diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_cudagraphs/env_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_cudagraphs/env_config.yaml
new file mode 100644
index 00000000000..329246987bf
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_cudagraphs/env_config.yaml
@@ -0,0 +1,5 @@
+- agent_type: examples.rl.environments.countdown.countdown_agent.CountdownAgent
+  agent_args:
+    dataset_file: "/mnt/artifacts/rl_environments/Jiayi-Pan___countdown-tasks-3to4"
+    split: "train"
+  weight: 1.0
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_cudagraphs/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_cudagraphs/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..78e76d55735
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_cudagraphs/golden_values_dev_dgx_h100.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 0.00762,
+            "2": 0.0,
+            "3": 0.0,
+            "4": 0.0,
+            "5": 0.0,
+            "6": 0.05397,
+            "7": 0.01964,
+            "8": 0.0,
+            "9": 0.0,
+            "10": 0.0,
+            "11": 0.0,
+            "12": 0.0,
+            "13": 0.0,
+            "14": 0.0,
+            "15": 0.0,
+            "16": 0.02209,
+            "17": 0.0,
+            "18": 0.0,
+            "19": 0.0,
+            "20": 0.0,
+            "21": 0.0,
+            "22": 0.0,
+            "23": 0.0,
+            "24": 0.0,
+            "25": 0.0,
+            "26": 0.0,
+            "27": 0.0,
+            "28": 0.0,
+            "29": 0.0,
+            "30": 0.0,
+            "31": 0.0,
+            "32": 0.0,
+            "33": 0.0,
+            "34": 0.0,
+            "35": 0.0,
+            "36": 0.0,
+            "37": 0.0,
+            "38": 0.0,
+            "39": 0.0,
+            "40": 0.0,
+            "41": 0.0,
+            "42": 0.0,
+            "43": 0.0,
+            "44": 0.0,
+            "45": 0.0,
+            "46": 0.0,
+            "47": 0.0,
+            "48": 0.0,
+            "49": 0.0,
+            "50": 0.04447
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 58.0,
+            "2": 583687296.0,
+            "3": 583687296.0,
+            "4": 583687296.0,
+            "5": 583687296.0,
+            "6": 35.0,
+            "7": 52.0,
+            "8": 583687296.0,
+            "9": 583687296.0,
+            "10": 583687296.0,
+            "11": 583687296.0,
+            "12": 583687296.0,
+            "13": 583687296.0,
+            "14": 583687296.0,
+            "15": 583687296.0,
+            "16": 52.0,
+            "17": 583687296.0,
+            "18": 583687296.0,
+            "19": 583687296.0,
+            "20": 583687296.0,
+            "21": 583687296.0,
+            "22": 583687296.0,
+            "23": 583687296.0,
+            "24": 583687296.0,
+            "25": 583687296.0,
+            "26": 583687296.0,
+            "27": 583687296.0,
+            "28": 583687296.0,
+            "29": 583687296.0,
+            "30": 583687296.0,
+            "31": 583687296.0,
+            "32": 583687296.0,
+            "33": 583687296.0,
+            "34": 583687296.0,
+            "35": 583687296.0,
+            "36": 583687296.0,
+            "37": 583687296.0,
+            "38": 583687296.0,
+            "39": 583687296.0,
+            "40": 583687296.0,
+            "41": 583687296.0,
+            "42": 583687296.0,
+            "43": 583687296.0,
+            "44": 583687296.0,
+            "45": 583687296.0,
+            "46": 583687296.0,
+            "47": 583687296.0,
+            "48": 583687296.0,
+            "49": 583687296.0,
+            "50": 45.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 55289954304.0,
+            "2": 55292747776.0,
+            "3": 55292731392.0,
+            "4": 55292891136.0,
+            "5": 55292878848.0,
+            "6": 55292878848.0,
+            "7": 55292878848.0,
+            "8": 55292788736.0,
+            "9": 55292788736.0,
+            "10": 55292788736.0,
+            "11": 55292792832.0,
+            "12": 55292792832.0,
+            "13": 55292792832.0,
+            "14": 55292792832.0,
+            "15": 55292792832.0,
+            "16": 55292796928.0,
+            "17": 55292796928.0,
+            "18": 55292801024.0,
+            "19": 55292805120.0,
+            "20": 55292801024.0,
+            "21": 55292801024.0,
+            "22": 55292796928.0,
+            "23": 55292801024.0,
+            "24": 55292796928.0,
+            "25": 55292801024.0,
+            "26": 55292796928.0,
+            "27": 55292796928.0,
+            "28": 55292801024.0,
+            "29": 55292801024.0,
+            "30": 55292805120.0,
+            "31": 55292805120.0,
+            "32": 55292805120.0,
+            "33": 55292805120.0,
+            "34": 55292805120.0,
+            "35": 55292805120.0,
+            "36": 55292805120.0,
+            "37": 55292801024.0,
+            "38": 55292801024.0,
+            "39": 55292801024.0,
+            "40": 55292805120.0,
+            "41": 55292805120.0,
+            "42": 55292805120.0,
+            "43": 55292801024.0,
+            "44": 55292796928.0,
+            "45": 55292801024.0,
+            "46": 55292801024.0,
+            "47": 55292801024.0,
+            "48": 55292801024.0,
+            "49": 55292805120.0,
+            "50": 55292805120.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 55289958400.0,
+            "2": 57103880192.0,
+            "3": 57104392192.0,
+            "4": 57104416768.0,
+            "5": 57104416768.0,
+            "6": 57104416768.0,
+            "7": 57104416768.0,
+            "8": 57104416768.0,
+            "9": 57104416768.0,
+            "10": 57104416768.0,
+            "11": 57104416768.0,
+            "12": 57104416768.0,
+            "13": 57104416768.0,
+            "14": 57104416768.0,
+            "15": 57104416768.0,
+            "16": 57104416768.0,
+            "17": 57104416768.0,
+            "18": 57104416768.0,
+            "19": 57104416768.0,
+            "20": 57104416768.0,
+            "21": 57104416768.0,
+            "22": 57104416768.0,
+            "23": 57104416768.0,
+            "24": 57104416768.0,
+            "25": 57104416768.0,
+            "26": 57104416768.0,
+            "27": 57104416768.0,
+            "28": 57104416768.0,
+            "29": 57104416768.0,
+            "30": 57104416768.0,
+            "31": 57104416768.0,
+            "32": 57104416768.0,
+            "33": 57104416768.0,
+            "34": 57104416768.0,
+            "35": 57104416768.0,
+            "36": 57104416768.0,
+            "37": 57104416768.0,
+            "38": 57104416768.0,
+            "39": 57104416768.0,
+            "40": 57104416768.0,
+            "41": 57104416768.0,
+            "42": 57104416768.0,
+            "43": 57104416768.0,
+            "44": 57104416768.0,
+            "45": 57104416768.0,
+            "46": 57104416768.0,
+            "47": 57104416768.0,
+            "48": 57104416768.0,
+            "49": 57104416768.0,
+            "50": 57104416768.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 38.24908,
+            "2": 4.52458,
+            "3": 3.69393,
+            "4": 3.38577,
+            "5": 3.41862,
+            "6": 3.27421,
+            "7": 3.32023,
+            "8": 3.83723,
+            "9": 4.07373,
+            "10": 3.47799,
+            "11": 3.27499,
+            "12": 3.37017,
+            "13": 3.3918,
+            "14": 3.25114,
+            "15": 3.29905,
+            "16": 3.29943,
+            "17": 3.50383,
+            "18": 3.56844,
+            "19": 3.30276,
+            "20": 3.34553,
+            "21": 3.29165,
+            "22": 3.30348,
+            "23": 3.33814,
+            "24": 3.31525,
+            "25": 3.29337,
+            "26": 3.26119,
+            "27": 3.5167,
+            "28": 3.2312,
+            "29": 3.45063,
+            "30": 3.3088,
+            "31": 3.32522,
+            "32": 3.28154,
+            "33": 3.23551,
+            "34": 3.20003,
+            "35": 3.25844,
+            "36": 3.67071,
+            "37": 3.1881,
+            "38": 3.30757,
+            "39": 3.32895,
+            "40": 3.29602,
+            "41": 3.25522,
+            "42": 3.28932,
+            "43": 3.32204,
+            "44": 3.26419,
+            "45": 3.75371,
+            "46": 3.23126,
+            "47": 3.25929,
+            "48": 3.19512,
+            "49": 3.32815,
+            "50": 3.25617
+        }
+    }
+}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_cudagraphs/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_cudagraphs/model_config.yaml
new file mode 100644
index 00000000000..c8f355db2b4
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_cudagraphs/model_config.yaml
@@ -0,0 +1,79 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+TEST_TYPE: frozen-start
+MODE: rl
+MODEL_ARGS:
+  --tiktoken-pattern: v2
+  --use-mcore-models: true
+  --tokenizer-type: TikTokenizer
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
+  --auto-detect-ckpt-format: true
+  --max-tokens-to-oom: 3600000
+  --inference-max-seq-length: 1024
+  --attention-backend: flash
+  --mock-data: true
+  --micro-batch-size: 1
+  --no-load-optim: true
+  --no-use-tokenizer-model-from-checkpoint-args: true
+  --timing-log-level: 0
+  --distributed-backend: nccl
+  --log-interval: 1
+  --log-progress: true
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 1
+  --ckpt-format: torch_dist
+  --bf16: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --num-layers: 24
+  --hidden-size: 1152
+  --num-attention-heads: 16
+  --max-position-embeddings: 1024
+  --seq-length: 1024
+  --timing-log-option: minmax
+  --log-throughput: true
+  --no-create-attention-mask-in-dataloader: true
+  --straggler-minmax-count: 16
+  --tensorboard-log-interval: 1
+  --empty-unused-memory-level: 2
+  --langrl-inference-server-type: inplace_megatron
+  --seed: 42
+  --calculate-per-token-loss: true
+  --rl-use-sequence-packing: true
+  --rl-sequence-packing-algo: fifo
+  --rl-offload-optimizer-during-inference: true
+  --timing-log-level: 1
+  --log-timers-to-tensorboard: true
+  --cuda-graph-impl: local
+  --micro-batch-size: 1
+  --global-batch-size: 16
+  --grpo-group-size: 2
+  --grpo-prompts-per-step: 8
+  --grpo-iterations: 1
+  --grpo-clamp-eps-lower: 0.2
+  --grpo-clamp-eps-upper: 0.2
+  --grpo-kl-beta: 0.0
+  --grpo-entropy-term-weight: 0.0
+  --langrl-env-config: tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest/env_config.yaml
+  --rl-partial-rollouts: true
+  --lr: 0.000001
+  --lr-warmup-samples: 0
+  --clip-grad: 1.0
+  --use-checkpoint-args: true
+  --dist-ckpt-strictness: log_unexpected
+  --perform-rl-step: true
+  --train-samples: 48828125
+  --exit-interval: 10
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --save-interval: 1000000
+  --eval-interval: 1000000
+  --finetune: true
+  --inference-logging-step-interval: 1
+  --rl-training-cuda-graphs: true
diff --git a/tests/test_utils/recipes/gpt-grpo.yaml b/tests/test_utils/recipes/gpt-grpo.yaml
index 11e8eadea9b..13bc5bf3a29 100644
--- a/tests/test_utils/recipes/gpt-grpo.yaml
+++ b/tests/test_utils/recipes/gpt-grpo.yaml
@@ -69,6 +69,11 @@ products:
       - environment: [dev]
         scope: [mr-broken]
         platforms: [dgx_h100]
+  - test_case: [gpt_grpo_tp1_pp1_dp8_583m_throughputtest_cudagraphs]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
   - test_case: [gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github]
     products:
       - environment: [dev]