From 0f7d222922e122cd83ccb8ab4cd4bc6fd97b8c36 Mon Sep 17 00:00:00 2001
From: Alexandros Koumparoulis <akoumparouli@nvidia.com>
Date: Thu, 22 May 2025 00:37:36 -0700
Subject: [PATCH] restructure

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>
---
 {nemo_lm/automodel => automodel}/__init__.py   |  0
 .../automodel => automodel}/base_recipe.py     |  0
 .../automodel => automodel}/config/loader.py   |  0
 .../datasets/hellaswag.py                      |  2 +-
 .../datasets/hf_dataset.py                     |  2 +-
 .../automodel => automodel}/datasets/utils.py  |  0
 .../automodel => automodel}/loggers/_wandb.py  |  2 +-
 .../loggers/log_utils.py                       |  0
 .../automodel => automodel}/loss/__init__.py   |  0
 .../automodel => automodel}/loss/chunked_ce.py |  0
 .../automodel => automodel}/loss/linear_ce.py  |  2 +-
 .../automodel => automodel}/loss/masked_ce.py  |  0
 .../models/auto_model_for_causal_lm.py         |  8 ++++----
 .../auto_model_for_image_text_to_text.py       |  0
 .../automodel => automodel}/optim/scheduler.py |  2 +-
 {nemo_lm => automodel/training}/__init__.py    |  0
 .../training/checkpoint.py                     | 14 +++++++-------
 .../training/checkpoint_utils.py               |  2 +-
 .../automodel => automodel}/training/config.py | 16 ++++++++--------
 .../training/finetune.py                       | 14 +++++++-------
 .../training/init_utils.py                     |  3 ++-
 .../training/model_utils.py                    |  2 +-
 .../automodel => automodel}/training/rng.py    |  2 +-
 .../automodel => automodel}/training/state.py  |  8 ++++----
 .../automodel => automodel}/training/timers.py |  2 +-
 .../training/train_utils.py                    |  6 +++---
 .../training => automodel/utils}/__init__.py   |  0
 .../utils/config_utils.py                      |  4 ++--
 .../utils/dist_utils.py                        |  2 +-
 .../utils/import_utils.py                      |  0
 .../automodel => automodel}/utils/sig_utils.py |  2 +-
 .../utils/yaml_utils.py                        |  0
 nemo_lm/automodel/utils/__init__.py            |  0
 recipes/{automodel_finetune.py => finetune.py} |  6 +++---
 recipes/llama_3_2_1b_hellaswag.yaml            | 18 +++++++++---------
 35 files changed, 60 insertions(+), 59 deletions(-)
 rename {nemo_lm/automodel => automodel}/__init__.py (100%)
 rename {nemo_lm/automodel => automodel}/base_recipe.py (100%)
 rename {nemo_lm/automodel => automodel}/config/loader.py (100%)
 rename {nemo_lm/automodel => automodel}/datasets/hellaswag.py (91%)
 rename {nemo_lm/automodel => automodel}/datasets/hf_dataset.py (99%)
 rename {nemo_lm/automodel => automodel}/datasets/utils.py (100%)
 rename {nemo_lm/automodel => automodel}/loggers/_wandb.py (98%)
 rename {nemo_lm/automodel => automodel}/loggers/log_utils.py (100%)
 rename {nemo_lm/automodel => automodel}/loss/__init__.py (100%)
 rename {nemo_lm/automodel => automodel}/loss/chunked_ce.py (100%)
 rename {nemo_lm/automodel => automodel}/loss/linear_ce.py (99%)
 rename {nemo_lm/automodel => automodel}/loss/masked_ce.py (100%)
 rename {nemo_lm/automodel => automodel}/models/auto_model_for_causal_lm.py (98%)
 rename {nemo_lm/automodel => automodel}/models/auto_model_for_image_text_to_text.py (100%)
 rename {nemo_lm/automodel => automodel}/optim/scheduler.py (99%)
 rename {nemo_lm => automodel/training}/__init__.py (100%)
 rename {nemo_lm/automodel => automodel}/training/checkpoint.py (97%)
 rename {nemo_lm/automodel => automodel}/training/checkpoint_utils.py (98%)
 rename {nemo_lm/automodel => automodel}/training/config.py (96%)
 rename {nemo_lm/automodel => automodel}/training/finetune.py (98%)
 rename {nemo_lm/automodel => automodel}/training/init_utils.py (97%)
 rename {nemo_lm/automodel => automodel}/training/model_utils.py (99%)
 rename {nemo_lm/automodel => automodel}/training/rng.py (96%)
 rename {nemo_lm/automodel => automodel}/training/state.py (97%)
 rename {nemo_lm/automodel => automodel}/training/timers.py (99%)
 rename {nemo_lm/automodel => automodel}/training/train_utils.py (98%)
 rename {nemo_lm/automodel/training => automodel/utils}/__init__.py (100%)
 rename {nemo_lm/automodel => automodel}/utils/config_utils.py (97%)
 rename {nemo_lm/automodel => automodel}/utils/dist_utils.py (99%)
 rename {nemo_lm/automodel => automodel}/utils/import_utils.py (100%)
 rename {nemo_lm/automodel => automodel}/utils/sig_utils.py (98%)
 rename {nemo_lm/automodel => automodel}/utils/yaml_utils.py (100%)
 delete mode 100644 nemo_lm/automodel/utils/__init__.py
 rename recipes/{automodel_finetune.py => finetune.py} (97%)

diff --git a/nemo_lm/automodel/__init__.py b/automodel/__init__.py
similarity index 100%
rename from nemo_lm/automodel/__init__.py
rename to automodel/__init__.py
diff --git a/nemo_lm/automodel/base_recipe.py b/automodel/base_recipe.py
similarity index 100%
rename from nemo_lm/automodel/base_recipe.py
rename to automodel/base_recipe.py
diff --git a/nemo_lm/automodel/config/loader.py b/automodel/config/loader.py
similarity index 100%
rename from nemo_lm/automodel/config/loader.py
rename to automodel/config/loader.py
diff --git a/nemo_lm/automodel/datasets/hellaswag.py b/automodel/datasets/hellaswag.py
similarity index 91%
rename from nemo_lm/automodel/datasets/hellaswag.py
rename to automodel/datasets/hellaswag.py
index 8ec26396da..6e728f75aa 100644
--- a/nemo_lm/automodel/datasets/hellaswag.py
+++ b/automodel/datasets/hellaswag.py
@@ -4,7 +4,7 @@
 
 import datasets
 from datasets import load_dataset
-from nemo_lm.automodel.datasets.utils import SFTSingleTurnPreprocessor
+from automodel.datasets.utils import SFTSingleTurnPreprocessor
 
 class HellaSwag:
     def __init__(self, path_or_dataset, tokenizer, split):
diff --git a/nemo_lm/automodel/datasets/hf_dataset.py b/automodel/datasets/hf_dataset.py
similarity index 99%
rename from nemo_lm/automodel/datasets/hf_dataset.py
rename to automodel/datasets/hf_dataset.py
index 963d6e9a94..b0209c2899 100644
--- a/nemo_lm/automodel/datasets/hf_dataset.py
+++ b/automodel/datasets/hf_dataset.py
@@ -9,7 +9,7 @@
 from torch.utils.data import DataLoader
 from torch.utils.data.distributed import DistributedSampler
 
-from nemo_lm.automodel.utils.common_utils import log_single_rank
+from automodel.utils.common_utils import log_single_rank
 
 logger = logging.getLogger(__name__)
 
diff --git a/nemo_lm/automodel/datasets/utils.py b/automodel/datasets/utils.py
similarity index 100%
rename from nemo_lm/automodel/datasets/utils.py
rename to automodel/datasets/utils.py
diff --git a/nemo_lm/automodel/loggers/_wandb.py b/automodel/loggers/_wandb.py
similarity index 98%
rename from nemo_lm/automodel/loggers/_wandb.py
rename to automodel/loggers/_wandb.py
index b0523011ab..dbccb3dbc4 100644
--- a/nemo_lm/automodel/loggers/_wandb.py
+++ b/automodel/loggers/_wandb.py
@@ -15,7 +15,7 @@
 from pathlib import Path
 from typing import Any, Optional
 
-from nemo_lm.automodel.utils.common_utils import print_rank_last
+from automodel.utils.common_utils import print_rank_last
 
 
 def on_save_checkpoint_success(
diff --git a/nemo_lm/automodel/loggers/log_utils.py b/automodel/loggers/log_utils.py
similarity index 100%
rename from nemo_lm/automodel/loggers/log_utils.py
rename to automodel/loggers/log_utils.py
diff --git a/nemo_lm/automodel/loss/__init__.py b/automodel/loss/__init__.py
similarity index 100%
rename from nemo_lm/automodel/loss/__init__.py
rename to automodel/loss/__init__.py
diff --git a/nemo_lm/automodel/loss/chunked_ce.py b/automodel/loss/chunked_ce.py
similarity index 100%
rename from nemo_lm/automodel/loss/chunked_ce.py
rename to automodel/loss/chunked_ce.py
diff --git a/nemo_lm/automodel/loss/linear_ce.py b/automodel/loss/linear_ce.py
similarity index 99%
rename from nemo_lm/automodel/loss/linear_ce.py
rename to automodel/loss/linear_ce.py
index 363f5da7dc..7888f56bdb 100644
--- a/nemo_lm/automodel/loss/linear_ce.py
+++ b/automodel/loss/linear_ce.py
@@ -62,7 +62,7 @@
 
 import torch
 
-from nemo_lm.automodel.utils.import_utils import safe_import_from
+from automodel.utils.import_utils import safe_import_from
 
 linear_cross_entropy, HAVE_LINEAR_LOSS_CE = safe_import_from(
     "cut_cross_entropy",
diff --git a/nemo_lm/automodel/loss/masked_ce.py b/automodel/loss/masked_ce.py
similarity index 100%
rename from nemo_lm/automodel/loss/masked_ce.py
rename to automodel/loss/masked_ce.py
diff --git a/nemo_lm/automodel/models/auto_model_for_causal_lm.py b/automodel/models/auto_model_for_causal_lm.py
similarity index 98%
rename from nemo_lm/automodel/models/auto_model_for_causal_lm.py
rename to automodel/models/auto_model_for_causal_lm.py
index 131f30fb3f..afbbb92f85 100644
--- a/nemo_lm/automodel/models/auto_model_for_causal_lm.py
+++ b/automodel/models/auto_model_for_causal_lm.py
@@ -20,11 +20,11 @@
 import torch.distributed as dist
 from transformers import AutoModelForCausalLM, BitsAndBytesConfig
 
-from nemo_lm.automodel.utils.dist_utils import FirstRankPerNode
-from nemo_lm.automodel.loss import masked_cross_entropy
-from nemo_lm.automodel.loss.linear_ce import HAVE_LINEAR_LOSS_CE, fused_linear_cross_entropy
+from automodel.utils.dist_utils import FirstRankPerNode
+from automodel.loss import masked_cross_entropy
+from automodel.loss.linear_ce import HAVE_LINEAR_LOSS_CE, fused_linear_cross_entropy
 # from nemo.utils import logging
-from nemo_lm.automodel.utils.import_utils import safe_import
+from automodel.utils.import_utils import safe_import
 
 
 @torch.no_grad()
diff --git a/nemo_lm/automodel/models/auto_model_for_image_text_to_text.py b/automodel/models/auto_model_for_image_text_to_text.py
similarity index 100%
rename from nemo_lm/automodel/models/auto_model_for_image_text_to_text.py
rename to automodel/models/auto_model_for_image_text_to_text.py
diff --git a/nemo_lm/automodel/optim/scheduler.py b/automodel/optim/scheduler.py
similarity index 99%
rename from nemo_lm/automodel/optim/scheduler.py
rename to automodel/optim/scheduler.py
index 610cd9dad1..df142feb3d 100644
--- a/nemo_lm/automodel/optim/scheduler.py
+++ b/automodel/optim/scheduler.py
@@ -8,7 +8,7 @@
 
 from torch.optim.optimizer import Optimizer
 
-from nemo_lm.automodel.utils.common_utils import log_single_rank
+from automodel.utils.common_utils import log_single_rank
 
 logger = logging.getLogger(__name__)
 
diff --git a/nemo_lm/__init__.py b/automodel/training/__init__.py
similarity index 100%
rename from nemo_lm/__init__.py
rename to automodel/training/__init__.py
diff --git a/nemo_lm/automodel/training/checkpoint.py b/automodel/training/checkpoint.py
similarity index 97%
rename from nemo_lm/automodel/training/checkpoint.py
rename to automodel/training/checkpoint.py
index 8581221c73..c5d25309fb 100644
--- a/nemo_lm/automodel/training/checkpoint.py
+++ b/automodel/training/checkpoint.py
@@ -24,11 +24,11 @@
 import torch
 from torch.nn import Module
 
-# from nemo_lm.automodel.components.state import GlobalState, TrainState
-# from nemo_lm.automodel.config import ConfigContainer
-# from nemo_lm.automodel.utils.model_utils import unwrap_model
-# from nemo_lm.automodel.utils import wandb_utils
-from nemo_lm.automodel.training.checkpoint_utils import (
+# from automodel.components.state import GlobalState, TrainState
+# from automodel.config import ConfigContainer
+# from automodel.utils.model_utils import unwrap_model
+# from automodel.utils import wandb_utils
+from automodel.training.checkpoint_utils import (
     TRACKER_PREFIX,
     checkpoint_exists,
     get_checkpoint_run_config_filename,
@@ -36,8 +36,8 @@
     read_run_config,
     read_train_state,
 )
-# from nemo_lm.automodel.utils.checkpoint_utils import TRAIN_STATE_FILE
-from nemo_lm.automodel.utils.dist_utils import (
+# from automodel.utils.checkpoint_utils import TRAIN_STATE_FILE
+from automodel.utils.dist_utils import (
     get_local_rank_preinit,
     get_rank_safe,
     get_world_size_safe
diff --git a/nemo_lm/automodel/training/checkpoint_utils.py b/automodel/training/checkpoint_utils.py
similarity index 98%
rename from nemo_lm/automodel/training/checkpoint_utils.py
rename to automodel/training/checkpoint_utils.py
index 8bf5c9e4e4..76cca849ef 100644
--- a/nemo_lm/automodel/training/checkpoint_utils.py
+++ b/automodel/training/checkpoint_utils.py
@@ -7,7 +7,7 @@
 import torch
 import yaml
 
-from nemo_lm.automodel.utils.dist_utils import (
+from automodel.utils.dist_utils import (
     get_local_rank_preinit,
     get_rank_safe,
     get_world_size_safe
diff --git a/nemo_lm/automodel/training/config.py b/automodel/training/config.py
similarity index 96%
rename from nemo_lm/automodel/training/config.py
rename to automodel/training/config.py
index e097f7c196..b4eca0e68f 100644
--- a/nemo_lm/automodel/training/config.py
+++ b/automodel/training/config.py
@@ -22,11 +22,11 @@
 from torch.nn.parallel import DistributedDataParallel
 from transformers import AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig
 
-from nemo_lm.automodel.components.data.hf_dataset import HFDatasetBuilder
-from nemo_lm.automodel.components.loss.linear_ce import HAVE_LINEAR_LOSS_CE
-from nemo_lm.automodel.components.loss.masked_ce import masked_cross_entropy
-from nemo_lm.automodel.components.scheduler import OptimizerParamScheduler
-from nemo_lm.automodel.utils.model_utils import JitConfig, TEConfig, jit_compile_model, te_accelerate
+from automodel.components.data.hf_dataset import HFDatasetBuilder
+from automodel.components.loss.linear_ce import HAVE_LINEAR_LOSS_CE
+from automodel.components.loss.masked_ce import masked_cross_entropy
+from automodel.components.scheduler import OptimizerParamScheduler
+from automodel.utils.model_utils import JitConfig, TEConfig, jit_compile_model, te_accelerate
 from nemo_lm.config.common import (
     DistributedInitConfig,
     LoggerConfig,
@@ -34,9 +34,9 @@
     RNGConfig,
     TrainingConfig,
 )
-from nemo_lm.automodel.utils.common_utils import get_rank_safe, get_world_size_safe
-from nemo_lm.automodel.utils.config_utils import ConfigContainer as Container
-from nemo_lm.automodel.utils.import_utils import safe_import
+from automodel.utils.common_utils import get_rank_safe, get_world_size_safe
+from automodel.utils.config_utils import ConfigContainer as Container
+from automodel.utils.import_utils import safe_import
 
 logger = logging.getLogger(__name__)
 
diff --git a/nemo_lm/automodel/training/finetune.py b/automodel/training/finetune.py
similarity index 98%
rename from nemo_lm/automodel/training/finetune.py
rename to automodel/training/finetune.py
index d54fd1e22f..806180904b 100644
--- a/nemo_lm/automodel/training/finetune.py
+++ b/automodel/training/finetune.py
@@ -23,29 +23,29 @@
 from torch.nn.parallel import DistributedDataParallel
 from transformers import AutoTokenizer
 
-from nemo_lm.automodel.checkpointing import (
+from automodel.checkpointing import (
     checkpoint_and_decide_exit,
     checkpoint_exists,
     load_checkpoint,
     save_checkpoint_and_time,
 )
-from nemo_lm.automodel.components.state import GlobalState
-from nemo_lm.automodel.config import ConfigContainer
-from nemo_lm.automodel.utils.distributed_utils import initialize_automodel
-from nemo_lm.automodel.utils.train_utils import (
+from automodel.components.state import GlobalState
+from automodel.config import ConfigContainer
+from automodel.utils.distributed_utils import initialize_automodel
+from automodel.utils.train_utils import (
     eval_log,
     reduce_loss,
     training_log,
 )
 from nemo_lm.config.common import ProfilingConfig
-from nemo_lm.automodel.utils.common_utils import (
+from automodel.utils.common_utils import (
     append_to_progress_log,
     barrier_and_log,
     get_rank_safe,
     get_world_size_safe,
     print_rank_0,
 )
-from nemo_lm.automodel.utils.log_utils import setup_logging
+from automodel.utils.log_utils import setup_logging
 
 logger = logging.getLogger(__name__)
 
diff --git a/nemo_lm/automodel/training/init_utils.py b/automodel/training/init_utils.py
similarity index 97%
rename from nemo_lm/automodel/training/init_utils.py
rename to automodel/training/init_utils.py
index 026c89e1b2..8005b026d2 100644
--- a/nemo_lm/automodel/training/init_utils.py
+++ b/automodel/training/init_utils.py
@@ -18,8 +18,9 @@
 
 import torch
 import torch.distributed
+from dataclasses import dataclass
 
-from nemo_lm.automodel.utils.dist_utils import (
+from automodel.utils.dist_utils import (
     get_local_rank_preinit,
     get_rank_safe,
     get_world_size_safe
diff --git a/nemo_lm/automodel/training/model_utils.py b/automodel/training/model_utils.py
similarity index 99%
rename from nemo_lm/automodel/training/model_utils.py
rename to automodel/training/model_utils.py
index c1767e3be2..1b144450bb 100644
--- a/nemo_lm/automodel/training/model_utils.py
+++ b/automodel/training/model_utils.py
@@ -20,7 +20,7 @@
 import torch
 from torch.nn.parallel import DistributedDataParallel
 
-from nemo_lm.automodel.utils.import_utils import safe_import_from
+from automodel.utils.import_utils import safe_import_from
 
 te, HAVE_TE = safe_import_from("transformer_engine", "pytorch")
 
diff --git a/nemo_lm/automodel/training/rng.py b/automodel/training/rng.py
similarity index 96%
rename from nemo_lm/automodel/training/rng.py
rename to automodel/training/rng.py
index 727498d356..ae811c3f12 100644
--- a/nemo_lm/automodel/training/rng.py
+++ b/automodel/training/rng.py
@@ -16,7 +16,7 @@
 import numpy as np
 import torch
 
-from nemo_lm.automodel.utils.dist_utils import get_rank_safe
+from automodel.utils.dist_utils import get_rank_safe
 
 class StatefulRNG:
     def __init__(self, seed: int, ranked: bool = False):
diff --git a/nemo_lm/automodel/training/state.py b/automodel/training/state.py
similarity index 97%
rename from nemo_lm/automodel/training/state.py
rename to automodel/training/state.py
index 6fa9c2825b..f70d400427 100644
--- a/nemo_lm/automodel/training/state.py
+++ b/automodel/training/state.py
@@ -22,10 +22,10 @@
 from torch.distributed.checkpoint.stateful import Stateful
 from torch.utils.tensorboard.writer import SummaryWriter
 
-from nemo_lm.automodel.components.timers import Timers
-from nemo_lm.automodel.config import ConfigContainer
-from nemo_lm.automodel.utils.common_utils import dump_dataclass_to_yaml, get_rank_safe, get_world_size_safe
-from nemo_lm.automodel.utils.sig_utils import DistributedSignalHandler
+from automodel.components.timers import Timers
+from automodel.config import ConfigContainer
+from automodel.utils.common_utils import dump_dataclass_to_yaml, get_rank_safe, get_world_size_safe
+from automodel.utils.sig_utils import DistributedSignalHandler
 
 
 @dataclass
diff --git a/nemo_lm/automodel/training/timers.py b/automodel/training/timers.py
similarity index 99%
rename from nemo_lm/automodel/training/timers.py
rename to automodel/training/timers.py
index 0021d90575..48215744a4 100644
--- a/nemo_lm/automodel/training/timers.py
+++ b/automodel/training/timers.py
@@ -8,7 +8,7 @@
 
 import torch
 
-from nemo_lm.automodel.utils.import_utils import is_torch_min_version
+from automodel.utils.import_utils import is_torch_min_version
 
 if is_torch_min_version("1.13.0"):
     dist_all_gather_func = torch.distributed.all_gather_into_tensor
diff --git a/nemo_lm/automodel/training/train_utils.py b/automodel/training/train_utils.py
similarity index 98%
rename from nemo_lm/automodel/training/train_utils.py
rename to automodel/training/train_utils.py
index e0a3c4ee09..dd7be49b49 100644
--- a/nemo_lm/automodel/training/train_utils.py
+++ b/automodel/training/train_utils.py
@@ -18,9 +18,9 @@
 
 import torch
 
-from nemo_lm.automodel.components.state import GlobalState
-from nemo_lm.automodel.config import ConfigContainer
-from nemo_lm.automodel.utils.common_utils import (
+from automodel.components.state import GlobalState
+from automodel.config import ConfigContainer
+from automodel.utils.common_utils import (
     get_world_size_safe,
     is_last_rank,
     print_rank_last,
diff --git a/nemo_lm/automodel/training/__init__.py b/automodel/utils/__init__.py
similarity index 100%
rename from nemo_lm/automodel/training/__init__.py
rename to automodel/utils/__init__.py
diff --git a/nemo_lm/automodel/utils/config_utils.py b/automodel/utils/config_utils.py
similarity index 97%
rename from nemo_lm/automodel/utils/config_utils.py
rename to automodel/utils/config_utils.py
index 1a4391290d..f3bf69b518 100644
--- a/nemo_lm/automodel/utils/config_utils.py
+++ b/automodel/utils/config_utils.py
@@ -21,8 +21,8 @@
 import yaml
 from omegaconf import OmegaConf
 
-from nemo_lm.automodel.utils.instantiate_utils import InstantiationMode, instantiate
-from nemo_lm.automodel.utils.yaml_utils import safe_yaml_representers
+from automodel.utils.instantiate_utils import InstantiationMode, instantiate
+from automodel.utils.yaml_utils import safe_yaml_representers
 
 T = TypeVar("T", bound="ConfigContainer")
 
diff --git a/nemo_lm/automodel/utils/dist_utils.py b/automodel/utils/dist_utils.py
similarity index 99%
rename from nemo_lm/automodel/utils/dist_utils.py
rename to automodel/utils/dist_utils.py
index 3fe36ef4c5..2581a9f9e2 100644
--- a/nemo_lm/automodel/utils/dist_utils.py
+++ b/automodel/utils/dist_utils.py
@@ -109,7 +109,7 @@ def _try_bootstrap_pg(self) -> bool:
 import torch.distributed
 import yaml
 
-from nemo_lm.automodel.utils.yaml_utils import safe_yaml_representers
+from automodel.utils.yaml_utils import safe_yaml_representers
 
 
 def get_rank_safe() -> int:
diff --git a/nemo_lm/automodel/utils/import_utils.py b/automodel/utils/import_utils.py
similarity index 100%
rename from nemo_lm/automodel/utils/import_utils.py
rename to automodel/utils/import_utils.py
diff --git a/nemo_lm/automodel/utils/sig_utils.py b/automodel/utils/sig_utils.py
similarity index 98%
rename from nemo_lm/automodel/utils/sig_utils.py
rename to automodel/utils/sig_utils.py
index 3f70c52f6f..d6d4a78be1 100644
--- a/nemo_lm/automodel/utils/sig_utils.py
+++ b/automodel/utils/sig_utils.py
@@ -18,7 +18,7 @@
 import torch
 import torch.distributed
 
-from nemo_lm.automodel.utils.common_utils import get_world_size_safe, print_rank_0
+from automodel.utils.common_utils import get_world_size_safe, print_rank_0
 
 
 def get_device(local_rank: Optional[int] = None) -> torch.device:
diff --git a/nemo_lm/automodel/utils/yaml_utils.py b/automodel/utils/yaml_utils.py
similarity index 100%
rename from nemo_lm/automodel/utils/yaml_utils.py
rename to automodel/utils/yaml_utils.py
diff --git a/nemo_lm/automodel/utils/__init__.py b/nemo_lm/automodel/utils/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/recipes/automodel_finetune.py b/recipes/finetune.py
similarity index 97%
rename from recipes/automodel_finetune.py
rename to recipes/finetune.py
index 0cac5c47e9..cb594fa9c3 100644
--- a/recipes/automodel_finetune.py
+++ b/recipes/finetune.py
@@ -6,9 +6,9 @@
 import torch.nn as nn
 from torch.utils.data import DataLoader
 
-from nemo_lm.automodel.config.loader import load_yaml_config
-from nemo_lm.automodel.training.init_utils import initialize_distributed
-from nemo_lm.automodel.base_recipe import BaseRecipe
+from automodel.config.loader import load_yaml_config
+from automodel.training.init_utils import initialize_distributed
+from automodel.base_recipe import BaseRecipe
 
 
 # ---------------------------
diff --git a/recipes/llama_3_2_1b_hellaswag.yaml b/recipes/llama_3_2_1b_hellaswag.yaml
index a22a2ad928..1dd1345e2c 100644
--- a/recipes/llama_3_2_1b_hellaswag.yaml
+++ b/recipes/llama_3_2_1b_hellaswag.yaml
@@ -1,5 +1,5 @@
 training:
-  _target_: nemo_lm.config.common.TrainingConfig
+  _target_: config.common.TrainingConfig
   train_iters: 250
   eval_interval: 1000
   eval_iters: 4
@@ -10,7 +10,7 @@ distributed:
   timeout_minutes: 1
 
 rng:
-  _target_: nemo_lm.automodel.training.rng.StatefulRNG
+  _target_: automodel.training.rng.StatefulRNG
   seed: 1111
   ranked: true
 
@@ -18,10 +18,10 @@ model:
   _target_: transformers.AutoModelForCausalLM.from_pretrained
   pretrained_model_name_or_path: meta-llama/Llama-3.2-1B
 
-loss_fn: nemo_lm.automodel.loss.masked_ce.masked_cross_entropy
+loss_fn: automodel.loss.masked_ce.masked_cross_entropy
 
 dataset:
-  _target_: nemo_lm.automodel.datasets.hellaswag.HellaSwag
+  _target_: automodel.datasets.hellaswag.HellaSwag
   path_or_dataset: rowan/hellaswag
   split: train
   tokenizer:
@@ -30,11 +30,11 @@ dataset:
 
 dataloader:
   _target_: torchdata.stateful_dataloader.StatefulDataLoader
-  collate_fn: nemo_lm.automodel.datasets.utils.default_collater
+  collate_fn: automodel.datasets.utils.default_collater
   batch_size: 1
 
 validation_dataset:
-  _target_: nemo_lm.automodel.datasets.hellaswag.hellaswag
+  _target_: automodel.datasets.hellaswag.hellaswag
   path_or_dataset: rowan/hellaswag
   split: train
   tokenizer:
@@ -53,7 +53,7 @@ optimizer:
   # min_lr: 1.0e-5
 
 scheduler:
-  _target_: nemo_lm.automodel.config.SchedulerConfig
+  _target_: automodel.config.SchedulerConfig
   start_weight_decay: 0
   end_weight_decay: 0
   weight_decay_incr_style: constant
@@ -64,7 +64,7 @@ scheduler:
   override_opt_param_scheduler: true
 
 logger:
-  _target_: nemo_lm.config.common.LoggerConfig
+  _target_: config.common.LoggerConfig
   wandb_project: nemo_automodel_sft_loop
   wandb_entity: nvidia
   wandb_exp_name: nemolm_automodel_Rowan_hellaswag_meta-llama_Llama-3.2-1B_gbs_256_seq_len_1024_lr_1.0e-5
@@ -81,7 +81,7 @@ logger:
     - nemo.collections.llm.gpt.data.utils
 
 checkpointer:
-  _target_: nemo_lm.automodel.training.checkpoint.TorchCheckpointer
+  _target_: automodel.training.checkpoint.TorchCheckpointer
   # save_interval: 10000
   # save: /tmp/nemo_run/checkpoints/automodel/Rowan_hellaswag_meta-llama_Llama-3.2-1B_gbs_256_seq_len_1024
   # load: /tmp/nemo_run/checkpoints/automodel/Rowan_hellaswag_meta-llama_Llama-3.2-1B_gbs_256_seq_len_1024