From 0f7d222922e122cd83ccb8ab4cd4bc6fd97b8c36 Mon Sep 17 00:00:00 2001 From: Alexandros Koumparoulis Date: Thu, 22 May 2025 00:37:36 -0700 Subject: [PATCH] restructure Signed-off-by: Alexandros Koumparoulis --- {nemo_lm/automodel => automodel}/__init__.py | 0 .../automodel => automodel}/base_recipe.py | 0 .../automodel => automodel}/config/loader.py | 0 .../datasets/hellaswag.py | 2 +- .../datasets/hf_dataset.py | 2 +- .../automodel => automodel}/datasets/utils.py | 0 .../automodel => automodel}/loggers/_wandb.py | 2 +- .../loggers/log_utils.py | 0 .../automodel => automodel}/loss/__init__.py | 0 .../automodel => automodel}/loss/chunked_ce.py | 0 .../automodel => automodel}/loss/linear_ce.py | 2 +- .../automodel => automodel}/loss/masked_ce.py | 0 .../models/auto_model_for_causal_lm.py | 8 ++++---- .../auto_model_for_image_text_to_text.py | 0 .../automodel => automodel}/optim/scheduler.py | 2 +- {nemo_lm => automodel/training}/__init__.py | 0 .../training/checkpoint.py | 14 +++++++------- .../training/checkpoint_utils.py | 2 +- .../automodel => automodel}/training/config.py | 16 ++++++++-------- .../training/finetune.py | 14 +++++++------- .../training/init_utils.py | 3 ++- .../training/model_utils.py | 2 +- .../automodel => automodel}/training/rng.py | 2 +- .../automodel => automodel}/training/state.py | 8 ++++---- .../automodel => automodel}/training/timers.py | 2 +- .../training/train_utils.py | 6 +++--- .../training => automodel/utils}/__init__.py | 0 .../utils/config_utils.py | 4 ++-- .../utils/dist_utils.py | 2 +- .../utils/import_utils.py | 0 .../automodel => automodel}/utils/sig_utils.py | 2 +- .../utils/yaml_utils.py | 0 nemo_lm/automodel/utils/__init__.py | 0 recipes/{automodel_finetune.py => finetune.py} | 6 +++--- recipes/llama_3_2_1b_hellaswag.yaml | 18 +++++++++--------- 35 files changed, 60 insertions(+), 59 deletions(-) rename {nemo_lm/automodel => automodel}/__init__.py (100%) rename {nemo_lm/automodel => automodel}/base_recipe.py (100%) rename {nemo_lm/automodel => automodel}/config/loader.py (100%) rename {nemo_lm/automodel => automodel}/datasets/hellaswag.py (91%) rename {nemo_lm/automodel => automodel}/datasets/hf_dataset.py (99%) rename {nemo_lm/automodel => automodel}/datasets/utils.py (100%) rename {nemo_lm/automodel => automodel}/loggers/_wandb.py (98%) rename {nemo_lm/automodel => automodel}/loggers/log_utils.py (100%) rename {nemo_lm/automodel => automodel}/loss/__init__.py (100%) rename {nemo_lm/automodel => automodel}/loss/chunked_ce.py (100%) rename {nemo_lm/automodel => automodel}/loss/linear_ce.py (99%) rename {nemo_lm/automodel => automodel}/loss/masked_ce.py (100%) rename {nemo_lm/automodel => automodel}/models/auto_model_for_causal_lm.py (98%) rename {nemo_lm/automodel => automodel}/models/auto_model_for_image_text_to_text.py (100%) rename {nemo_lm/automodel => automodel}/optim/scheduler.py (99%) rename {nemo_lm => automodel/training}/__init__.py (100%) rename {nemo_lm/automodel => automodel}/training/checkpoint.py (97%) rename {nemo_lm/automodel => automodel}/training/checkpoint_utils.py (98%) rename {nemo_lm/automodel => automodel}/training/config.py (96%) rename {nemo_lm/automodel => automodel}/training/finetune.py (98%) rename {nemo_lm/automodel => automodel}/training/init_utils.py (97%) rename {nemo_lm/automodel => automodel}/training/model_utils.py (99%) rename {nemo_lm/automodel => automodel}/training/rng.py (96%) rename {nemo_lm/automodel => automodel}/training/state.py (97%) rename {nemo_lm/automodel => automodel}/training/timers.py (99%) rename {nemo_lm/automodel => automodel}/training/train_utils.py (98%) rename {nemo_lm/automodel/training => automodel/utils}/__init__.py (100%) rename {nemo_lm/automodel => automodel}/utils/config_utils.py (97%) rename {nemo_lm/automodel => automodel}/utils/dist_utils.py (99%) rename {nemo_lm/automodel => automodel}/utils/import_utils.py (100%) rename {nemo_lm/automodel => automodel}/utils/sig_utils.py (98%) rename {nemo_lm/automodel => automodel}/utils/yaml_utils.py (100%) delete mode 100644 nemo_lm/automodel/utils/__init__.py rename recipes/{automodel_finetune.py => finetune.py} (97%) diff --git a/nemo_lm/automodel/__init__.py b/automodel/__init__.py similarity index 100% rename from nemo_lm/automodel/__init__.py rename to automodel/__init__.py diff --git a/nemo_lm/automodel/base_recipe.py b/automodel/base_recipe.py similarity index 100% rename from nemo_lm/automodel/base_recipe.py rename to automodel/base_recipe.py diff --git a/nemo_lm/automodel/config/loader.py b/automodel/config/loader.py similarity index 100% rename from nemo_lm/automodel/config/loader.py rename to automodel/config/loader.py diff --git a/nemo_lm/automodel/datasets/hellaswag.py b/automodel/datasets/hellaswag.py similarity index 91% rename from nemo_lm/automodel/datasets/hellaswag.py rename to automodel/datasets/hellaswag.py index 8ec26396da..6e728f75aa 100644 --- a/nemo_lm/automodel/datasets/hellaswag.py +++ b/automodel/datasets/hellaswag.py @@ -4,7 +4,7 @@ import datasets from datasets import load_dataset -from nemo_lm.automodel.datasets.utils import SFTSingleTurnPreprocessor +from automodel.datasets.utils import SFTSingleTurnPreprocessor class HellaSwag: def __init__(self, path_or_dataset, tokenizer, split): diff --git a/nemo_lm/automodel/datasets/hf_dataset.py b/automodel/datasets/hf_dataset.py similarity index 99% rename from nemo_lm/automodel/datasets/hf_dataset.py rename to automodel/datasets/hf_dataset.py index 963d6e9a94..b0209c2899 100644 --- a/nemo_lm/automodel/datasets/hf_dataset.py +++ b/automodel/datasets/hf_dataset.py @@ -9,7 +9,7 @@ from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler -from nemo_lm.automodel.utils.common_utils import log_single_rank +from automodel.utils.common_utils import log_single_rank logger = logging.getLogger(__name__) diff --git a/nemo_lm/automodel/datasets/utils.py b/automodel/datasets/utils.py similarity index 100% rename from nemo_lm/automodel/datasets/utils.py rename to automodel/datasets/utils.py diff --git a/nemo_lm/automodel/loggers/_wandb.py b/automodel/loggers/_wandb.py similarity index 98% rename from nemo_lm/automodel/loggers/_wandb.py rename to automodel/loggers/_wandb.py index b0523011ab..dbccb3dbc4 100644 --- a/nemo_lm/automodel/loggers/_wandb.py +++ b/automodel/loggers/_wandb.py @@ -15,7 +15,7 @@ from pathlib import Path from typing import Any, Optional -from nemo_lm.automodel.utils.common_utils import print_rank_last +from automodel.utils.common_utils import print_rank_last def on_save_checkpoint_success( diff --git a/nemo_lm/automodel/loggers/log_utils.py b/automodel/loggers/log_utils.py similarity index 100% rename from nemo_lm/automodel/loggers/log_utils.py rename to automodel/loggers/log_utils.py diff --git a/nemo_lm/automodel/loss/__init__.py b/automodel/loss/__init__.py similarity index 100% rename from nemo_lm/automodel/loss/__init__.py rename to automodel/loss/__init__.py diff --git a/nemo_lm/automodel/loss/chunked_ce.py b/automodel/loss/chunked_ce.py similarity index 100% rename from nemo_lm/automodel/loss/chunked_ce.py rename to automodel/loss/chunked_ce.py diff --git a/nemo_lm/automodel/loss/linear_ce.py b/automodel/loss/linear_ce.py similarity index 99% rename from nemo_lm/automodel/loss/linear_ce.py rename to automodel/loss/linear_ce.py index 363f5da7dc..7888f56bdb 100644 --- a/nemo_lm/automodel/loss/linear_ce.py +++ b/automodel/loss/linear_ce.py @@ -62,7 +62,7 @@ import torch -from nemo_lm.automodel.utils.import_utils import safe_import_from +from automodel.utils.import_utils import safe_import_from linear_cross_entropy, HAVE_LINEAR_LOSS_CE = safe_import_from( "cut_cross_entropy", diff --git a/nemo_lm/automodel/loss/masked_ce.py b/automodel/loss/masked_ce.py similarity index 100% rename from nemo_lm/automodel/loss/masked_ce.py rename to automodel/loss/masked_ce.py diff --git a/nemo_lm/automodel/models/auto_model_for_causal_lm.py b/automodel/models/auto_model_for_causal_lm.py similarity index 98% rename from nemo_lm/automodel/models/auto_model_for_causal_lm.py rename to automodel/models/auto_model_for_causal_lm.py index 131f30fb3f..afbbb92f85 100644 --- a/nemo_lm/automodel/models/auto_model_for_causal_lm.py +++ b/automodel/models/auto_model_for_causal_lm.py @@ -20,11 +20,11 @@ import torch.distributed as dist from transformers import AutoModelForCausalLM, BitsAndBytesConfig -from nemo_lm.automodel.utils.dist_utils import FirstRankPerNode -from nemo_lm.automodel.loss import masked_cross_entropy -from nemo_lm.automodel.loss.linear_ce import HAVE_LINEAR_LOSS_CE, fused_linear_cross_entropy +from automodel.utils.dist_utils import FirstRankPerNode +from automodel.loss import masked_cross_entropy +from automodel.loss.linear_ce import HAVE_LINEAR_LOSS_CE, fused_linear_cross_entropy # from nemo.utils import logging -from nemo_lm.automodel.utils.import_utils import safe_import +from automodel.utils.import_utils import safe_import @torch.no_grad() diff --git a/nemo_lm/automodel/models/auto_model_for_image_text_to_text.py b/automodel/models/auto_model_for_image_text_to_text.py similarity index 100% rename from nemo_lm/automodel/models/auto_model_for_image_text_to_text.py rename to automodel/models/auto_model_for_image_text_to_text.py diff --git a/nemo_lm/automodel/optim/scheduler.py b/automodel/optim/scheduler.py similarity index 99% rename from nemo_lm/automodel/optim/scheduler.py rename to automodel/optim/scheduler.py index 610cd9dad1..df142feb3d 100644 --- a/nemo_lm/automodel/optim/scheduler.py +++ b/automodel/optim/scheduler.py @@ -8,7 +8,7 @@ from torch.optim.optimizer import Optimizer -from nemo_lm.automodel.utils.common_utils import log_single_rank +from automodel.utils.common_utils import log_single_rank logger = logging.getLogger(__name__) diff --git a/nemo_lm/__init__.py b/automodel/training/__init__.py similarity index 100% rename from nemo_lm/__init__.py rename to automodel/training/__init__.py diff --git a/nemo_lm/automodel/training/checkpoint.py b/automodel/training/checkpoint.py similarity index 97% rename from nemo_lm/automodel/training/checkpoint.py rename to automodel/training/checkpoint.py index 8581221c73..c5d25309fb 100644 --- a/nemo_lm/automodel/training/checkpoint.py +++ b/automodel/training/checkpoint.py @@ -24,11 +24,11 @@ import torch from torch.nn import Module -# from nemo_lm.automodel.components.state import GlobalState, TrainState -# from nemo_lm.automodel.config import ConfigContainer -# from nemo_lm.automodel.utils.model_utils import unwrap_model -# from nemo_lm.automodel.utils import wandb_utils -from nemo_lm.automodel.training.checkpoint_utils import ( +# from automodel.components.state import GlobalState, TrainState +# from automodel.config import ConfigContainer +# from automodel.utils.model_utils import unwrap_model +# from automodel.utils import wandb_utils +from automodel.training.checkpoint_utils import ( TRACKER_PREFIX, checkpoint_exists, get_checkpoint_run_config_filename, @@ -36,8 +36,8 @@ read_run_config, read_train_state, ) -# from nemo_lm.automodel.utils.checkpoint_utils import TRAIN_STATE_FILE -from nemo_lm.automodel.utils.dist_utils import ( +# from automodel.utils.checkpoint_utils import TRAIN_STATE_FILE +from automodel.utils.dist_utils import ( get_local_rank_preinit, get_rank_safe, get_world_size_safe diff --git a/nemo_lm/automodel/training/checkpoint_utils.py b/automodel/training/checkpoint_utils.py similarity index 98% rename from nemo_lm/automodel/training/checkpoint_utils.py rename to automodel/training/checkpoint_utils.py index 8bf5c9e4e4..76cca849ef 100644 --- a/nemo_lm/automodel/training/checkpoint_utils.py +++ b/automodel/training/checkpoint_utils.py @@ -7,7 +7,7 @@ import torch import yaml -from nemo_lm.automodel.utils.dist_utils import ( +from automodel.utils.dist_utils import ( get_local_rank_preinit, get_rank_safe, get_world_size_safe diff --git a/nemo_lm/automodel/training/config.py b/automodel/training/config.py similarity index 96% rename from nemo_lm/automodel/training/config.py rename to automodel/training/config.py index e097f7c196..b4eca0e68f 100644 --- a/nemo_lm/automodel/training/config.py +++ b/automodel/training/config.py @@ -22,11 +22,11 @@ from torch.nn.parallel import DistributedDataParallel from transformers import AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig -from nemo_lm.automodel.components.data.hf_dataset import HFDatasetBuilder -from nemo_lm.automodel.components.loss.linear_ce import HAVE_LINEAR_LOSS_CE -from nemo_lm.automodel.components.loss.masked_ce import masked_cross_entropy -from nemo_lm.automodel.components.scheduler import OptimizerParamScheduler -from nemo_lm.automodel.utils.model_utils import JitConfig, TEConfig, jit_compile_model, te_accelerate +from automodel.components.data.hf_dataset import HFDatasetBuilder +from automodel.components.loss.linear_ce import HAVE_LINEAR_LOSS_CE +from automodel.components.loss.masked_ce import masked_cross_entropy +from automodel.components.scheduler import OptimizerParamScheduler +from automodel.utils.model_utils import JitConfig, TEConfig, jit_compile_model, te_accelerate from nemo_lm.config.common import ( DistributedInitConfig, LoggerConfig, @@ -34,9 +34,9 @@ RNGConfig, TrainingConfig, ) -from nemo_lm.automodel.utils.common_utils import get_rank_safe, get_world_size_safe -from nemo_lm.automodel.utils.config_utils import ConfigContainer as Container -from nemo_lm.automodel.utils.import_utils import safe_import +from automodel.utils.common_utils import get_rank_safe, get_world_size_safe +from automodel.utils.config_utils import ConfigContainer as Container +from automodel.utils.import_utils import safe_import logger = logging.getLogger(__name__) diff --git a/nemo_lm/automodel/training/finetune.py b/automodel/training/finetune.py similarity index 98% rename from nemo_lm/automodel/training/finetune.py rename to automodel/training/finetune.py index d54fd1e22f..806180904b 100644 --- a/nemo_lm/automodel/training/finetune.py +++ b/automodel/training/finetune.py @@ -23,29 +23,29 @@ from torch.nn.parallel import DistributedDataParallel from transformers import AutoTokenizer -from nemo_lm.automodel.checkpointing import ( +from automodel.checkpointing import ( checkpoint_and_decide_exit, checkpoint_exists, load_checkpoint, save_checkpoint_and_time, ) -from nemo_lm.automodel.components.state import GlobalState -from nemo_lm.automodel.config import ConfigContainer -from nemo_lm.automodel.utils.distributed_utils import initialize_automodel -from nemo_lm.automodel.utils.train_utils import ( +from automodel.components.state import GlobalState +from automodel.config import ConfigContainer +from automodel.utils.distributed_utils import initialize_automodel +from automodel.utils.train_utils import ( eval_log, reduce_loss, training_log, ) from nemo_lm.config.common import ProfilingConfig -from nemo_lm.automodel.utils.common_utils import ( +from automodel.utils.common_utils import ( append_to_progress_log, barrier_and_log, get_rank_safe, get_world_size_safe, print_rank_0, ) -from nemo_lm.automodel.utils.log_utils import setup_logging +from automodel.utils.log_utils import setup_logging logger = logging.getLogger(__name__) diff --git a/nemo_lm/automodel/training/init_utils.py b/automodel/training/init_utils.py similarity index 97% rename from nemo_lm/automodel/training/init_utils.py rename to automodel/training/init_utils.py index 026c89e1b2..8005b026d2 100644 --- a/nemo_lm/automodel/training/init_utils.py +++ b/automodel/training/init_utils.py @@ -18,8 +18,9 @@ import torch import torch.distributed +from dataclasses import dataclass -from nemo_lm.automodel.utils.dist_utils import ( +from automodel.utils.dist_utils import ( get_local_rank_preinit, get_rank_safe, get_world_size_safe diff --git a/nemo_lm/automodel/training/model_utils.py b/automodel/training/model_utils.py similarity index 99% rename from nemo_lm/automodel/training/model_utils.py rename to automodel/training/model_utils.py index c1767e3be2..1b144450bb 100644 --- a/nemo_lm/automodel/training/model_utils.py +++ b/automodel/training/model_utils.py @@ -20,7 +20,7 @@ import torch from torch.nn.parallel import DistributedDataParallel -from nemo_lm.automodel.utils.import_utils import safe_import_from +from automodel.utils.import_utils import safe_import_from te, HAVE_TE = safe_import_from("transformer_engine", "pytorch") diff --git a/nemo_lm/automodel/training/rng.py b/automodel/training/rng.py similarity index 96% rename from nemo_lm/automodel/training/rng.py rename to automodel/training/rng.py index 727498d356..ae811c3f12 100644 --- a/nemo_lm/automodel/training/rng.py +++ b/automodel/training/rng.py @@ -16,7 +16,7 @@ import numpy as np import torch -from nemo_lm.automodel.utils.dist_utils import get_rank_safe +from automodel.utils.dist_utils import get_rank_safe class StatefulRNG: def __init__(self, seed: int, ranked: bool = False): diff --git a/nemo_lm/automodel/training/state.py b/automodel/training/state.py similarity index 97% rename from nemo_lm/automodel/training/state.py rename to automodel/training/state.py index 6fa9c2825b..f70d400427 100644 --- a/nemo_lm/automodel/training/state.py +++ b/automodel/training/state.py @@ -22,10 +22,10 @@ from torch.distributed.checkpoint.stateful import Stateful from torch.utils.tensorboard.writer import SummaryWriter -from nemo_lm.automodel.components.timers import Timers -from nemo_lm.automodel.config import ConfigContainer -from nemo_lm.automodel.utils.common_utils import dump_dataclass_to_yaml, get_rank_safe, get_world_size_safe -from nemo_lm.automodel.utils.sig_utils import DistributedSignalHandler +from automodel.components.timers import Timers +from automodel.config import ConfigContainer +from automodel.utils.common_utils import dump_dataclass_to_yaml, get_rank_safe, get_world_size_safe +from automodel.utils.sig_utils import DistributedSignalHandler @dataclass diff --git a/nemo_lm/automodel/training/timers.py b/automodel/training/timers.py similarity index 99% rename from nemo_lm/automodel/training/timers.py rename to automodel/training/timers.py index 0021d90575..48215744a4 100644 --- a/nemo_lm/automodel/training/timers.py +++ b/automodel/training/timers.py @@ -8,7 +8,7 @@ import torch -from nemo_lm.automodel.utils.import_utils import is_torch_min_version +from automodel.utils.import_utils import is_torch_min_version if is_torch_min_version("1.13.0"): dist_all_gather_func = torch.distributed.all_gather_into_tensor diff --git a/nemo_lm/automodel/training/train_utils.py b/automodel/training/train_utils.py similarity index 98% rename from nemo_lm/automodel/training/train_utils.py rename to automodel/training/train_utils.py index e0a3c4ee09..dd7be49b49 100644 --- a/nemo_lm/automodel/training/train_utils.py +++ b/automodel/training/train_utils.py @@ -18,9 +18,9 @@ import torch -from nemo_lm.automodel.components.state import GlobalState -from nemo_lm.automodel.config import ConfigContainer -from nemo_lm.automodel.utils.common_utils import ( +from automodel.components.state import GlobalState +from automodel.config import ConfigContainer +from automodel.utils.common_utils import ( get_world_size_safe, is_last_rank, print_rank_last, diff --git a/nemo_lm/automodel/training/__init__.py b/automodel/utils/__init__.py similarity index 100% rename from nemo_lm/automodel/training/__init__.py rename to automodel/utils/__init__.py diff --git a/nemo_lm/automodel/utils/config_utils.py b/automodel/utils/config_utils.py similarity index 97% rename from nemo_lm/automodel/utils/config_utils.py rename to automodel/utils/config_utils.py index 1a4391290d..f3bf69b518 100644 --- a/nemo_lm/automodel/utils/config_utils.py +++ b/automodel/utils/config_utils.py @@ -21,8 +21,8 @@ import yaml from omegaconf import OmegaConf -from nemo_lm.automodel.utils.instantiate_utils import InstantiationMode, instantiate -from nemo_lm.automodel.utils.yaml_utils import safe_yaml_representers +from automodel.utils.instantiate_utils import InstantiationMode, instantiate +from automodel.utils.yaml_utils import safe_yaml_representers T = TypeVar("T", bound="ConfigContainer") diff --git a/nemo_lm/automodel/utils/dist_utils.py b/automodel/utils/dist_utils.py similarity index 99% rename from nemo_lm/automodel/utils/dist_utils.py rename to automodel/utils/dist_utils.py index 3fe36ef4c5..2581a9f9e2 100644 --- a/nemo_lm/automodel/utils/dist_utils.py +++ b/automodel/utils/dist_utils.py @@ -109,7 +109,7 @@ def _try_bootstrap_pg(self) -> bool: import torch.distributed import yaml -from nemo_lm.automodel.utils.yaml_utils import safe_yaml_representers +from automodel.utils.yaml_utils import safe_yaml_representers def get_rank_safe() -> int: diff --git a/nemo_lm/automodel/utils/import_utils.py b/automodel/utils/import_utils.py similarity index 100% rename from nemo_lm/automodel/utils/import_utils.py rename to automodel/utils/import_utils.py diff --git a/nemo_lm/automodel/utils/sig_utils.py b/automodel/utils/sig_utils.py similarity index 98% rename from nemo_lm/automodel/utils/sig_utils.py rename to automodel/utils/sig_utils.py index 3f70c52f6f..d6d4a78be1 100644 --- a/nemo_lm/automodel/utils/sig_utils.py +++ b/automodel/utils/sig_utils.py @@ -18,7 +18,7 @@ import torch import torch.distributed -from nemo_lm.automodel.utils.common_utils import get_world_size_safe, print_rank_0 +from automodel.utils.common_utils import get_world_size_safe, print_rank_0 def get_device(local_rank: Optional[int] = None) -> torch.device: diff --git a/nemo_lm/automodel/utils/yaml_utils.py b/automodel/utils/yaml_utils.py similarity index 100% rename from nemo_lm/automodel/utils/yaml_utils.py rename to automodel/utils/yaml_utils.py diff --git a/nemo_lm/automodel/utils/__init__.py b/nemo_lm/automodel/utils/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/recipes/automodel_finetune.py b/recipes/finetune.py similarity index 97% rename from recipes/automodel_finetune.py rename to recipes/finetune.py index 0cac5c47e9..cb594fa9c3 100644 --- a/recipes/automodel_finetune.py +++ b/recipes/finetune.py @@ -6,9 +6,9 @@ import torch.nn as nn from torch.utils.data import DataLoader -from nemo_lm.automodel.config.loader import load_yaml_config -from nemo_lm.automodel.training.init_utils import initialize_distributed -from nemo_lm.automodel.base_recipe import BaseRecipe +from automodel.config.loader import load_yaml_config +from automodel.training.init_utils import initialize_distributed +from automodel.base_recipe import BaseRecipe # --------------------------- diff --git a/recipes/llama_3_2_1b_hellaswag.yaml b/recipes/llama_3_2_1b_hellaswag.yaml index a22a2ad928..1dd1345e2c 100644 --- a/recipes/llama_3_2_1b_hellaswag.yaml +++ b/recipes/llama_3_2_1b_hellaswag.yaml @@ -1,5 +1,5 @@ training: - _target_: nemo_lm.config.common.TrainingConfig + _target_: config.common.TrainingConfig train_iters: 250 eval_interval: 1000 eval_iters: 4 @@ -10,7 +10,7 @@ distributed: timeout_minutes: 1 rng: - _target_: nemo_lm.automodel.training.rng.StatefulRNG + _target_: automodel.training.rng.StatefulRNG seed: 1111 ranked: true @@ -18,10 +18,10 @@ model: _target_: transformers.AutoModelForCausalLM.from_pretrained pretrained_model_name_or_path: meta-llama/Llama-3.2-1B -loss_fn: nemo_lm.automodel.loss.masked_ce.masked_cross_entropy +loss_fn: automodel.loss.masked_ce.masked_cross_entropy dataset: - _target_: nemo_lm.automodel.datasets.hellaswag.HellaSwag + _target_: automodel.datasets.hellaswag.HellaSwag path_or_dataset: rowan/hellaswag split: train tokenizer: @@ -30,11 +30,11 @@ dataset: dataloader: _target_: torchdata.stateful_dataloader.StatefulDataLoader - collate_fn: nemo_lm.automodel.datasets.utils.default_collater + collate_fn: automodel.datasets.utils.default_collater batch_size: 1 validation_dataset: - _target_: nemo_lm.automodel.datasets.hellaswag.hellaswag + _target_: automodel.datasets.hellaswag.hellaswag path_or_dataset: rowan/hellaswag split: train tokenizer: @@ -53,7 +53,7 @@ optimizer: # min_lr: 1.0e-5 scheduler: - _target_: nemo_lm.automodel.config.SchedulerConfig + _target_: automodel.config.SchedulerConfig start_weight_decay: 0 end_weight_decay: 0 weight_decay_incr_style: constant @@ -64,7 +64,7 @@ scheduler: override_opt_param_scheduler: true logger: - _target_: nemo_lm.config.common.LoggerConfig + _target_: config.common.LoggerConfig wandb_project: nemo_automodel_sft_loop wandb_entity: nvidia wandb_exp_name: nemolm_automodel_Rowan_hellaswag_meta-llama_Llama-3.2-1B_gbs_256_seq_len_1024_lr_1.0e-5 @@ -81,7 +81,7 @@ logger: - nemo.collections.llm.gpt.data.utils checkpointer: - _target_: nemo_lm.automodel.training.checkpoint.TorchCheckpointer + _target_: automodel.training.checkpoint.TorchCheckpointer # save_interval: 10000 # save: /tmp/nemo_run/checkpoints/automodel/Rowan_hellaswag_meta-llama_Llama-3.2-1B_gbs_256_seq_len_1024 # load: /tmp/nemo_run/checkpoints/automodel/Rowan_hellaswag_meta-llama_Llama-3.2-1B_gbs_256_seq_len_1024