From 2442083bda4d2469a57839d22640e3a02fba53ee Mon Sep 17 00:00:00 2001
From: Parth Mannan <pmannan@nvidia.com>
Date: Sun, 22 Feb 2026 13:18:10 -0800
Subject: [PATCH 1/2] Update moe_token_dispatcher_type default to alltoall

Signed-off-by: Parth Mannan <pmannan@nvidia.com>
---
 examples/configs/distillation_math.yaml                       | 2 +-
 examples/configs/distillation_math_megatron.yaml              | 2 +-
 examples/configs/dpo.yaml                                     | 2 +-
 examples/configs/grpo_math_1B.yaml                            | 2 +-
 examples/configs/grpo_math_1B_megatron.yaml                   | 2 +-
 examples/configs/sft.yaml                                     | 2 +-
 examples/configs/sft_openmathinstruct2_megatron.yaml          | 2 +-
 examples/configs/vlm_grpo_3B.yaml                             | 2 +-
 examples/configs/vlm_grpo_3B_megatron.yaml                    | 2 +-
 .../grpo_workplace_assistant_nemotron_nano_v2_9b.yaml         | 2 +-
 nemo_rl/models/policy/__init__.py                             | 2 +-
 tests/unit/models/megatron/test_megatron_setup.py             | 4 ++--
 tests/unit/models/policy/test_megatron_worker.py              | 2 +-
 13 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/examples/configs/distillation_math.yaml b/examples/configs/distillation_math.yaml
index bb3743a4fc..b32664fa63 100644
--- a/examples/configs/distillation_math.yaml
+++ b/examples/configs/distillation_math.yaml
@@ -109,7 +109,7 @@ policy: &POLICY_BASE
         defer_fp32_logits: False
         moe_per_layer_logging: False
         moe_enable_deepep: false
-        moe_token_dispatcher_type: "allgather"
+        moe_token_dispatcher_type: "alltoall"
         moe_shared_expert_overlap: false
         
         optimizer:
diff --git a/examples/configs/distillation_math_megatron.yaml b/examples/configs/distillation_math_megatron.yaml
index 76151678f1..e6eafa67e3 100644
--- a/examples/configs/distillation_math_megatron.yaml
+++ b/examples/configs/distillation_math_megatron.yaml
@@ -60,7 +60,7 @@ policy: &POLICY_BASE
         moe_per_layer_logging: False
         defer_fp32_logits: False
         moe_enable_deepep: false
-        moe_token_dispatcher_type: "allgather"
+        moe_token_dispatcher_type: "alltoall"
         moe_shared_expert_overlap: false
         
         optimizer:
diff --git a/examples/configs/dpo.yaml b/examples/configs/dpo.yaml
index fec83a6199..ddeb0fa7b8 100755
--- a/examples/configs/dpo.yaml
+++ b/examples/configs/dpo.yaml
@@ -133,7 +133,7 @@ policy:
     defer_fp32_logits: False
     moe_per_layer_logging: False
     moe_enable_deepep: false
-    moe_token_dispatcher_type: "allgather"
+    moe_token_dispatcher_type: "alltoall"
     moe_shared_expert_overlap: false
     
     optimizer:
diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml
index 740f9ad24b..3e5ada5761 100644
--- a/examples/configs/grpo_math_1B.yaml
+++ b/examples/configs/grpo_math_1B.yaml
@@ -144,7 +144,7 @@ policy:
     defer_fp32_logits: False
     moe_per_layer_logging: False
     moe_enable_deepep: false
-    moe_token_dispatcher_type: "allgather"
+    moe_token_dispatcher_type: "alltoall"
     moe_shared_expert_overlap: false
 
     optimizer:
diff --git a/examples/configs/grpo_math_1B_megatron.yaml b/examples/configs/grpo_math_1B_megatron.yaml
index b240c6519c..f328fe016c 100644
--- a/examples/configs/grpo_math_1B_megatron.yaml
+++ b/examples/configs/grpo_math_1B_megatron.yaml
@@ -96,7 +96,7 @@ policy:
     moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
     moe_permute_fusion: false
     moe_enable_deepep: false
-    moe_token_dispatcher_type: "allgather"
+    moe_token_dispatcher_type: "alltoall"
     moe_shared_expert_overlap: false
     #gives ~20% training perf speedup with sequence packing 
     apply_rope_fusion: True
diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml
index 7b90a90c38..821da4e530 100644
--- a/examples/configs/sft.yaml
+++ b/examples/configs/sft.yaml
@@ -115,7 +115,7 @@ policy:
     defer_fp32_logits: False
     moe_per_layer_logging: False
     moe_enable_deepep: false
-    moe_token_dispatcher_type: "allgather"
+    moe_token_dispatcher_type: "alltoall"
     moe_shared_expert_overlap: false
 
     peft:
diff --git a/examples/configs/sft_openmathinstruct2_megatron.yaml b/examples/configs/sft_openmathinstruct2_megatron.yaml
index 40f62473ac..e0748a1479 100644
--- a/examples/configs/sft_openmathinstruct2_megatron.yaml
+++ b/examples/configs/sft_openmathinstruct2_megatron.yaml
@@ -94,7 +94,7 @@ policy:
     bias_activation_fusion: True
     moe_per_layer_logging: False
     moe_enable_deepep: false
-    moe_token_dispatcher_type: "allgather"
+    moe_token_dispatcher_type: "alltoall"
     moe_shared_expert_overlap: false
 
     env_vars:
diff --git a/examples/configs/vlm_grpo_3B.yaml b/examples/configs/vlm_grpo_3B.yaml
index 4cad631c85..3e2d3b5597 100644
--- a/examples/configs/vlm_grpo_3B.yaml
+++ b/examples/configs/vlm_grpo_3B.yaml
@@ -114,7 +114,7 @@ policy:
     defer_fp32_logits: False
     moe_per_layer_logging: False
     moe_enable_deepep: false
-    moe_token_dispatcher_type: "allgather"
+    moe_token_dispatcher_type: "alltoall"
     moe_shared_expert_overlap: false
 
     optimizer:
diff --git a/examples/configs/vlm_grpo_3B_megatron.yaml b/examples/configs/vlm_grpo_3B_megatron.yaml
index 336d97d79b..63a12a226f 100644
--- a/examples/configs/vlm_grpo_3B_megatron.yaml
+++ b/examples/configs/vlm_grpo_3B_megatron.yaml
@@ -156,7 +156,7 @@ policy:
     defer_fp32_logits: False
     moe_per_layer_logging: False
     moe_enable_deepep: false
-    moe_token_dispatcher_type: "allgather"
+    moe_token_dispatcher_type: "alltoall"
     moe_shared_expert_overlap: false
     optimizer:
       optimizer: adam
diff --git a/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml b/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml
index a923f842b7..5de09a4a3a 100644
--- a/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml
+++ b/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml
@@ -111,7 +111,7 @@ policy:
     defer_fp32_logits: false
     moe_permute_fusion: false
     moe_enable_deepep: false
-    moe_token_dispatcher_type: "allgather"
+    moe_token_dispatcher_type: "alltoall"
     moe_shared_expert_overlap: false
 
     optimizer:
diff --git a/nemo_rl/models/policy/__init__.py b/nemo_rl/models/policy/__init__.py
index 363399cbca..f1b405347d 100644
--- a/nemo_rl/models/policy/__init__.py
+++ b/nemo_rl/models/policy/__init__.py
@@ -187,7 +187,7 @@ class MegatronConfig(TypedDict):
     # Must set moe_token_dispatcher_type to 'flex'
     # Must set moe_shared_expert_overlap to False
     moe_enable_deepep: bool
-    # The type of token dispatcher to use. The default is 'allgather'.
+    # The type of token dispatcher to use. The default is 'alltoall'.
     # Options are 'allgather','alltoall' and 'flex'
     # Use 'flex' when using DeepEP
     moe_token_dispatcher_type: str
diff --git a/tests/unit/models/megatron/test_megatron_setup.py b/tests/unit/models/megatron/test_megatron_setup.py
index 16d77389a6..7b2a5d3622 100644
--- a/tests/unit/models/megatron/test_megatron_setup.py
+++ b/tests/unit/models/megatron/test_megatron_setup.py
@@ -187,7 +187,7 @@ def test_moe_configuration(self):
                 "moe_router_bias_update_rate": 0.0,
                 "moe_permute_fusion": True,
                 "moe_enable_deepep": False,
-                "moe_token_dispatcher_type": "allgather",
+                "moe_token_dispatcher_type": "alltoall",
                 "moe_shared_expert_overlap": True,
             }
         }
@@ -201,7 +201,7 @@ def test_moe_configuration(self):
         assert model_cfg.moe_router_bias_update_rate == 0.0
         assert model_cfg.moe_permute_fusion is True
         assert model_cfg.moe_enable_deepep is False
-        assert model_cfg.moe_token_dispatcher_type == "allgather"
+        assert model_cfg.moe_token_dispatcher_type == "alltoall"
         assert model_cfg.moe_shared_expert_overlap is True
 
 
diff --git a/tests/unit/models/policy/test_megatron_worker.py b/tests/unit/models/policy/test_megatron_worker.py
index 7d329ab411..2fbd16fc3c 100644
--- a/tests/unit/models/policy/test_megatron_worker.py
+++ b/tests/unit/models/policy/test_megatron_worker.py
@@ -135,7 +135,7 @@ def create_megatron_test_config(
             "bias_activation_fusion": True,
             "moe_per_layer_logging": False,
             "moe_enable_deepep": False,
-            "moe_token_dispatcher_type": "allgather",
+            "moe_token_dispatcher_type": "alltoall",
             "moe_shared_expert_overlap": False,
             "defer_fp32_logits": defer_fp32_logits,
             "train_iters": 100,  # Required for Megatron training

From 493ddefe319073b24e757660125ee7facadabc35 Mon Sep 17 00:00:00 2001
From: Parth Mannan <pmannan@nvidia.com>
Date: Sun, 22 Feb 2026 13:21:00 -0800
Subject: [PATCH 2/2] Update in vllm test

Signed-off-by: Parth Mannan <pmannan@nvidia.com>
---
 tests/unit/models/generation/test_vllm_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index c27a183b5c..3c06df8a80 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -194,7 +194,7 @@ def get_basic_megatron_test_config(
             "moe_router_bias_update_rate": 0.0,
             "moe_permute_fusion": False,
             "moe_enable_deepep": False,
-            "moe_token_dispatcher_type": "allgather",
+            "moe_token_dispatcher_type": "alltoall",
             "moe_shared_expert_overlap": False,
             "apply_rope_fusion": True,
             "bias_activation_fusion": True,