Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
- agent_type: examples.rl.environments.countdown.countdown_agent.CountdownAgent
agent_args:
dataset_file: "/mnt/artifacts/rl_environments/Jiayi-Pan___countdown-tasks-3to4"
split: "train"
weight: 1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
{
"lm loss": {
"start_step": 1,
"end_step": 50,
"step_interval": 1,
"values": {
"1": 0.00762,
"2": 0.0,
"3": 0.0,
"4": 0.0,
"5": 0.0,
"6": 0.05397,
"7": 0.01964,
"8": 0.0,
"9": 0.0,
"10": 0.0,
"11": 0.0,
"12": 0.0,
"13": 0.0,
"14": 0.0,
"15": 0.0,
"16": 0.02209,
"17": 0.0,
"18": 0.0,
"19": 0.0,
"20": 0.0,
"21": 0.0,
"22": 0.0,
"23": 0.0,
"24": 0.0,
"25": 0.0,
"26": 0.0,
"27": 0.0,
"28": 0.0,
"29": 0.0,
"30": 0.0,
"31": 0.0,
"32": 0.0,
"33": 0.0,
"34": 0.0,
"35": 0.0,
"36": 0.0,
"37": 0.0,
"38": 0.0,
"39": 0.0,
"40": 0.0,
"41": 0.0,
"42": 0.0,
"43": 0.0,
"44": 0.0,
"45": 0.0,
"46": 0.0,
"47": 0.0,
"48": 0.0,
"49": 0.0,
"50": 0.04447
}
},
"num-zeros": {
"start_step": 1,
"end_step": 50,
"step_interval": 1,
"values": {
"1": 58.0,
"2": 583687296.0,
"3": 583687296.0,
"4": 583687296.0,
"5": 583687296.0,
"6": 35.0,
"7": 52.0,
"8": 583687296.0,
"9": 583687296.0,
"10": 583687296.0,
"11": 583687296.0,
"12": 583687296.0,
"13": 583687296.0,
"14": 583687296.0,
"15": 583687296.0,
"16": 52.0,
"17": 583687296.0,
"18": 583687296.0,
"19": 583687296.0,
"20": 583687296.0,
"21": 583687296.0,
"22": 583687296.0,
"23": 583687296.0,
"24": 583687296.0,
"25": 583687296.0,
"26": 583687296.0,
"27": 583687296.0,
"28": 583687296.0,
"29": 583687296.0,
"30": 583687296.0,
"31": 583687296.0,
"32": 583687296.0,
"33": 583687296.0,
"34": 583687296.0,
"35": 583687296.0,
"36": 583687296.0,
"37": 583687296.0,
"38": 583687296.0,
"39": 583687296.0,
"40": 583687296.0,
"41": 583687296.0,
"42": 583687296.0,
"43": 583687296.0,
"44": 583687296.0,
"45": 583687296.0,
"46": 583687296.0,
"47": 583687296.0,
"48": 583687296.0,
"49": 583687296.0,
"50": 45.0
}
},
"mem-allocated-bytes": {
"start_step": 1,
"end_step": 50,
"step_interval": 1,
"values": {
"1": 55289954304.0,
"2": 55292747776.0,
"3": 55292731392.0,
"4": 55292891136.0,
"5": 55292878848.0,
"6": 55292878848.0,
"7": 55292878848.0,
"8": 55292788736.0,
"9": 55292788736.0,
"10": 55292788736.0,
"11": 55292792832.0,
"12": 55292792832.0,
"13": 55292792832.0,
"14": 55292792832.0,
"15": 55292792832.0,
"16": 55292796928.0,
"17": 55292796928.0,
"18": 55292801024.0,
"19": 55292805120.0,
"20": 55292801024.0,
"21": 55292801024.0,
"22": 55292796928.0,
"23": 55292801024.0,
"24": 55292796928.0,
"25": 55292801024.0,
"26": 55292796928.0,
"27": 55292796928.0,
"28": 55292801024.0,
"29": 55292801024.0,
"30": 55292805120.0,
"31": 55292805120.0,
"32": 55292805120.0,
"33": 55292805120.0,
"34": 55292805120.0,
"35": 55292805120.0,
"36": 55292805120.0,
"37": 55292801024.0,
"38": 55292801024.0,
"39": 55292801024.0,
"40": 55292805120.0,
"41": 55292805120.0,
"42": 55292805120.0,
"43": 55292801024.0,
"44": 55292796928.0,
"45": 55292801024.0,
"46": 55292801024.0,
"47": 55292801024.0,
"48": 55292801024.0,
"49": 55292805120.0,
"50": 55292805120.0
}
},
"mem-max-allocated-bytes": {
"start_step": 1,
"end_step": 50,
"step_interval": 1,
"values": {
"1": 55289958400.0,
"2": 57103880192.0,
"3": 57104392192.0,
"4": 57104416768.0,
"5": 57104416768.0,
"6": 57104416768.0,
"7": 57104416768.0,
"8": 57104416768.0,
"9": 57104416768.0,
"10": 57104416768.0,
"11": 57104416768.0,
"12": 57104416768.0,
"13": 57104416768.0,
"14": 57104416768.0,
"15": 57104416768.0,
"16": 57104416768.0,
"17": 57104416768.0,
"18": 57104416768.0,
"19": 57104416768.0,
"20": 57104416768.0,
"21": 57104416768.0,
"22": 57104416768.0,
"23": 57104416768.0,
"24": 57104416768.0,
"25": 57104416768.0,
"26": 57104416768.0,
"27": 57104416768.0,
"28": 57104416768.0,
"29": 57104416768.0,
"30": 57104416768.0,
"31": 57104416768.0,
"32": 57104416768.0,
"33": 57104416768.0,
"34": 57104416768.0,
"35": 57104416768.0,
"36": 57104416768.0,
"37": 57104416768.0,
"38": 57104416768.0,
"39": 57104416768.0,
"40": 57104416768.0,
"41": 57104416768.0,
"42": 57104416768.0,
"43": 57104416768.0,
"44": 57104416768.0,
"45": 57104416768.0,
"46": 57104416768.0,
"47": 57104416768.0,
"48": 57104416768.0,
"49": 57104416768.0,
"50": 57104416768.0
}
},
"iteration-time": {
"start_step": 1,
"end_step": 50,
"step_interval": 1,
"values": {
"1": 38.24908,
"2": 4.52458,
"3": 3.69393,
"4": 3.38577,
"5": 3.41862,
"6": 3.27421,
"7": 3.32023,
"8": 3.83723,
"9": 4.07373,
"10": 3.47799,
"11": 3.27499,
"12": 3.37017,
"13": 3.3918,
"14": 3.25114,
"15": 3.29905,
"16": 3.29943,
"17": 3.50383,
"18": 3.56844,
"19": 3.30276,
"20": 3.34553,
"21": 3.29165,
"22": 3.30348,
"23": 3.33814,
"24": 3.31525,
"25": 3.29337,
"26": 3.26119,
"27": 3.5167,
"28": 3.2312,
"29": 3.45063,
"30": 3.3088,
"31": 3.32522,
"32": 3.28154,
"33": 3.23551,
"34": 3.20003,
"35": 3.25844,
"36": 3.67071,
"37": 3.1881,
"38": 3.30757,
"39": 3.32895,
"40": 3.29602,
"41": 3.25522,
"42": 3.28932,
"43": 3.32204,
"44": 3.26419,
"45": 3.75371,
"46": 3.23126,
"47": 3.25929,
"48": 3.19512,
"49": 3.32815,
"50": 3.25617
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
ENV_VARS:
CUDA_DEVICE_MAX_CONNECTIONS: 1
NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
NCCL_ALGO: Ring
CUBLAS_WORKSPACE_CONFIG: :4096:8
TEST_TYPE: frozen-start
MODE: rl
MODEL_ARGS:
--tiktoken-pattern: v2
--use-mcore-models: true
--tokenizer-type: TikTokenizer
--tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
--load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
--auto-detect-ckpt-format: true
--max-tokens-to-oom: 3600000
--inference-max-seq-length: 1024
--attention-backend: flash
--mock-data: true
--micro-batch-size: 1
--no-load-optim: true
--no-use-tokenizer-model-from-checkpoint-args: true
--timing-log-level: 0
--distributed-backend: nccl
--log-interval: 1
--log-progress: true
--transformer-impl: transformer_engine
--tensor-model-parallel-size: 1
--pipeline-model-parallel-size: 1
--ckpt-format: torch_dist
--bf16: true
--log-memory-to-tensorboard: true
--log-num-zeros-in-grad: true
--log-validation-ppl-to-tensorboard: true
--log-timers-to-tensorboard: true
--num-layers: 24
--hidden-size: 1152
--num-attention-heads: 16
--max-position-embeddings: 1024
--seq-length: 1024
--timing-log-option: minmax
--log-throughput: true
--no-create-attention-mask-in-dataloader: true
--straggler-minmax-count: 16
--tensorboard-log-interval: 1
--empty-unused-memory-level: 2
--langrl-inference-server-type: inplace_megatron
--seed: 42
--calculate-per-token-loss: true
--rl-use-sequence-packing: true
--rl-sequence-packing-algo: fifo
--rl-offload-optimizer-during-inference: true
--timing-log-level: 1
--log-timers-to-tensorboard: true
--cuda-graph-impl: local
--micro-batch-size: 1
--global-batch-size: 16
--grpo-group-size: 2
--grpo-prompts-per-step: 8
--grpo-iterations: 1
--grpo-clamp-eps-lower: 0.2
--grpo-clamp-eps-upper: 0.2
--grpo-kl-beta: 0.0
--grpo-entropy-term-weight: 0.0
--langrl-env-config: tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest/env_config.yaml
--rl-partial-rollouts: true
--lr: 0.000001
--lr-warmup-samples: 0
--clip-grad: 1.0
--use-checkpoint-args: true
--dist-ckpt-strictness: log_unexpected
--perform-rl-step: true
--train-samples: 48828125
--exit-interval: 10
--tensorboard-dir: ${TENSORBOARD_PATH}
--save-interval: 1000000
--eval-interval: 1000000
--finetune: true
--inference-logging-step-interval: 1
--rl-training-cuda-graphs: true
5 changes: 5 additions & 0 deletions tests/test_utils/recipes/gpt-grpo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ products:
- environment: [dev]
scope: [mr-broken]
platforms: [dgx_h100]
- test_case: [gpt_grpo_tp1_pp1_dp8_583m_throughputtest_cudagraphs]
products:
- environment: [dev]
scope: [mr]
platforms: [dgx_h100]
- test_case: [gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github]
products:
- environment: [dev]
Expand Down
Loading