From 5db04697683f51366c7937df94eb01d1df4a34c5 Mon Sep 17 00:00:00 2001 From: Ali Taghibakhshi Date: Fri, 19 Jul 2024 16:08:55 -0700 Subject: [PATCH 1/2] minor fix tutorial --- tutorials/llm/mamba/mamba.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tutorials/llm/mamba/mamba.rst b/tutorials/llm/mamba/mamba.rst index 525be296730a..f7b73f9a13be 100644 --- a/tutorials/llm/mamba/mamba.rst +++ b/tutorials/llm/mamba/mamba.rst @@ -28,7 +28,9 @@ In order to proceed, ensure that you have met the following requirements: * A Docker-enabled environment, with `NVIDIA Container Runtime `_ installed, which will make the container GPU-aware. -* `Authenticate with NVIDIA NGC `_, generate API KEY from `NGC `__, add the key to your credentials following instructions in `this guide `__, and get into NVIDIA NeMo dev container ``nvcr.io/nvidia/nemo:dev``. + +* `Authenticate with NVIDIA NGC `_, and download `NGC CLI Tool `_. + Step-by-step Guide for Fine-Tuning ---------------------------------- @@ -51,13 +53,13 @@ Convert the Pytorch Checkpoint to a NeMo Checkpoint .. code:: bash - CUDA_VISIBLE_DEVICES="0" python /NeMo/scripts/checkpoint_converters/convert_mamba2_pyt_to_nemo.py \ + CUDA_VISIBLE_DEVICES="0" python /opt/NeMo/scripts/checkpoint_converters/convert_mamba2_pyt_to_nemo.py \ --input_name_or_path \ --output_path \ - --ngroups_mamba 8 \ + --mamba_ssm_ngroups 8 \ --precision bf16 -* Note: the ``ngroups_mamba`` parameter should be 1 for the Mamba2 models from the `Transformers are SSMs paper `__ (130m, 370m, 780m, 1.3b, and 2.7b) and 8 for the Mamba2 and Mamba2-Hybrid models by `NVIDIA `__ (both 8b). +* Note: the ``mamba_ssm_ngroups`` parameter should be 1 for the Mamba2 models from the `Transformers are SSMs paper `__ (130m, 370m, 780m, 1.3b, and 2.7b) and 8 for the Mamba2 and Mamba2-Hybrid models by `NVIDIA `__ (both 8b). Model (Tensor) Parallelism for the 8b Models ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -106,8 +108,8 @@ Run Fine-Tuning export NVTE_FUSED_ATTN=1 export NVTE_FLASH_ATTN=0 - MASTER_PORT=15008 torchrun --nproc_per_node=${NUM_DEVICES} - /home/ataghibakhsh/NeMo/examples/nlp/language_modeling/tuning/megatron_mamba_finetuning.py \ + torchrun --nproc_per_node=${NUM_DEVICES} + /opt/NeMo/examples/nlp/language_modeling/tuning/megatron_mamba_finetuning.py \ --config-path=${CONFIG_PATH} \ --config-name=${CONFIG_NAME} \ trainer.devices=${NUM_DEVICES} \ From d6a1bba7e0f211d3407c57bebfc85c36672f3801 Mon Sep 17 00:00:00 2001 From: Ali Taghibakhshi Date: Fri, 19 Jul 2024 16:10:26 -0700 Subject: [PATCH 2/2] minor fix tutorial --- tutorials/llm/mamba/mamba.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tutorials/llm/mamba/mamba.rst b/tutorials/llm/mamba/mamba.rst index f7b73f9a13be..2ce5ee5f616b 100644 --- a/tutorials/llm/mamba/mamba.rst +++ b/tutorials/llm/mamba/mamba.rst @@ -29,8 +29,7 @@ In order to proceed, ensure that you have met the following requirements: * A Docker-enabled environment, with `NVIDIA Container Runtime `_ installed, which will make the container GPU-aware. -* `Authenticate with NVIDIA NGC `_, and download `NGC CLI Tool `_. - +* `Authenticate with NVIDIA NGC `_, generate API KEY from `NGC `__, add the key to your credentials following instructions in `this guide `__, and get into NVIDIA NeMo dev container ``nvcr.io/nvidia/nemo:dev``. Step-by-step Guide for Fine-Tuning ----------------------------------