From c3d819cc8bfcb97f8909c36196ec01721d460d19 Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Mon, 24 Nov 2025 17:34:41 +0000
Subject: [PATCH 01/14] Adding optimizer registry and its test cases

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 .../finetune/experimental/core/optimizer.py   | 22 +++++++
 .../experimental/tests/test_optimizer.py      | 62 +++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 QEfficient/finetune/experimental/tests/test_optimizer.py

diff --git a/QEfficient/finetune/experimental/core/optimizer.py b/QEfficient/finetune/experimental/core/optimizer.py
index d647b73a6..2304a1525 100644
--- a/QEfficient/finetune/experimental/core/optimizer.py
+++ b/QEfficient/finetune/experimental/core/optimizer.py
@@ -4,3 +4,25 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
+
+"""
+Optimizer components for the training system.
+"""
+
+from typing import Type
+
+import torch.optim as optim
+from torch.optim import Optimizer
+
+from QEfficient.finetune.experimental.core.component_registry import registry
+
+registry.optimizer("adam")(optim.Adam)
+registry.optimizer("adamw")(optim.AdamW)
+registry.optimizer("sgd")(optim.SGD)
+
+
+def get_optimizer_cls(optimizer_name: str) -> Type[Optimizer]:
+    optimizer_cls = registry.get_optimizer(optimizer_name)
+    if optimizer_cls is None:
+        raise ValueError(f"Unknown optimizer: {optimizer_name}")
+    return optimizer_cls
diff --git a/QEfficient/finetune/experimental/tests/test_optimizer.py b/QEfficient/finetune/experimental/tests/test_optimizer.py
new file mode 100644
index 000000000..b1f6f82be
--- /dev/null
+++ b/QEfficient/finetune/experimental/tests/test_optimizer.py
@@ -0,0 +1,62 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------
+
+import sys
+from pathlib import Path
+
+import pytest
+import torch.optim as optim
+
+from QEfficient import QEFFAutoModelForCausalLM
+from QEfficient.finetune.experimental.core.component_registry import ComponentFactory
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+OPTIMIZER_CONFIGS = {
+    "adam": {
+        "name": "adam",
+        "opt_cls": optim.Adam,
+        "lr": 1e-4,
+        "weight_decay": 0.01,
+        "betas": (0.9, 0.999),
+        "eps": 1e-8,
+        "amsgrad": False,
+    },
+    "adamw": {
+        "name": "adamw",
+        "opt_cls": optim.AdamW,
+        "lr": 1e-4,
+        "weight_decay": 0.01,
+        "betas": (0.9, 0.999),
+        "eps": 1e-8,
+        "amsgrad": False,
+    },
+    "sgd": {
+        "name": "sgd",
+        "opt_cls": optim.SGD,
+        "lr": 1e-4,
+        "momentum": 0.9,
+        "weight_decay": 0.01,
+        "dampening": 0.0,
+        "nesterov": False,
+    },
+}
+
+
+@pytest.fixture
+def ref_model():
+    return QEFFAutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")
+
+
+@pytest.mark.parametrize("opt_name", OPTIMIZER_CONFIGS.keys())
+def test_optimizers(opt_name, ref_model):
+    """Test that all optimizers can be created with their configs."""
+    # Create optimizer using the factory
+    config = OPTIMIZER_CONFIGS[opt_name]
+    opt_inst = ComponentFactory.create_optimizer(**config, model_params=ref_model.model.parameters())
+    assert opt_inst is not None
+    assert isinstance(opt_inst, optim.Optimizer)
+    assert len(list(opt_inst.param_groups)) == 1

From 9e402bcfbaa69280d598eb95aa129dc09209a232 Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Thu, 27 Nov 2025 07:00:25 +0000
Subject: [PATCH 02/14] Adding optimizer registry and its test cases

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 .../finetune/experimental/core/optimizer.py   | 12 +++--
 .../experimental/tests/test_optimizer.py      | 47 ++++++++++++++-----
 2 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/QEfficient/finetune/experimental/core/optimizer.py b/QEfficient/finetune/experimental/core/optimizer.py
index 2304a1525..64c110ec5 100644
--- a/QEfficient/finetune/experimental/core/optimizer.py
+++ b/QEfficient/finetune/experimental/core/optimizer.py
@@ -16,9 +16,15 @@
 
 from QEfficient.finetune.experimental.core.component_registry import registry
 
-registry.optimizer("adam")(optim.Adam)
-registry.optimizer("adamw")(optim.AdamW)
-registry.optimizer("sgd")(optim.SGD)
+
+def register_optimizer(optimizer_name: str, cls: Type[Optimizer]) -> None:
+    """Register a new optimizer class."""
+    registry.optimizer(optimizer_name)(cls)
+
+
+register_optimizer("adam", optim.Adam)
+register_optimizer("adamw", optim.AdamW)
+register_optimizer("sgd", optim.SGD)
 
 
 def get_optimizer_cls(optimizer_name: str) -> Type[Optimizer]:
diff --git a/QEfficient/finetune/experimental/tests/test_optimizer.py b/QEfficient/finetune/experimental/tests/test_optimizer.py
index b1f6f82be..d84a2a524 100644
--- a/QEfficient/finetune/experimental/tests/test_optimizer.py
+++ b/QEfficient/finetune/experimental/tests/test_optimizer.py
@@ -5,19 +5,20 @@
 #
 # -----------------------------------------------------------------------------
 
+import inspect
 import sys
 from pathlib import Path
 
 import pytest
+import torch.nn as nn
 import torch.optim as optim
 
-from QEfficient import QEFFAutoModelForCausalLM
-from QEfficient.finetune.experimental.core.component_registry import ComponentFactory
+from QEfficient.finetune.experimental.core.optimizer import get_optimizer_cls, register_optimizer
 
 sys.path.insert(0, str(Path(__file__).parent.parent))
 OPTIMIZER_CONFIGS = {
     "adam": {
-        "name": "adam",
+        "optimizer_name": "adam",
         "opt_cls": optim.Adam,
         "lr": 1e-4,
         "weight_decay": 0.01,
@@ -26,7 +27,7 @@
         "amsgrad": False,
     },
     "adamw": {
-        "name": "adamw",
+        "optimizer_name": "adamw",
         "opt_cls": optim.AdamW,
         "lr": 1e-4,
         "weight_decay": 0.01,
@@ -35,7 +36,7 @@
         "amsgrad": False,
     },
     "sgd": {
-        "name": "sgd",
+        "optimizer_name": "sgd",
         "opt_cls": optim.SGD,
         "lr": 1e-4,
         "momentum": 0.9,
@@ -47,16 +48,40 @@
 
 
 @pytest.fixture
-def ref_model():
-    return QEFFAutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")
+def dummy_model():
+    return nn.Sequential(
+        nn.Linear(10, 5),
+        nn.ReLU(),
+        nn.Linear(5, 1),
+    )
 
 
 @pytest.mark.parametrize("opt_name", OPTIMIZER_CONFIGS.keys())
-def test_optimizers(opt_name, ref_model):
+def test_optimizers(opt_name, dummy_model):
     """Test that all optimizers can be created with their configs."""
-    # Create optimizer using the factory
+    # Register optimizer class
     config = OPTIMIZER_CONFIGS[opt_name]
-    opt_inst = ComponentFactory.create_optimizer(**config, model_params=ref_model.model.parameters())
-    assert opt_inst is not None
+    register_optimizer(config["optimizer_name"], config["opt_cls"])
+    optimizer_class = get_optimizer_cls(config["optimizer_name"])
+    assert optimizer_class is not None
+    assert optimizer_class == config["opt_cls"]
+    valid_params = inspect.signature(optimizer_class).parameters
+    filtered_config = {k: v for k, v in config.items() if k in valid_params}
+    opt_inst = optimizer_class(dummy_model.parameters(), **filtered_config)
     assert isinstance(opt_inst, optim.Optimizer)
     assert len(list(opt_inst.param_groups)) == 1
+    assert opt_inst.param_groups[0]["lr"] == config["lr"]
+    if "weight_decay" in config:
+        assert opt_inst.param_groups[0]["weight_decay"] == config["weight_decay"]
+    if "betas" in config:
+        assert opt_inst.param_groups[0]["betas"] == config["betas"]
+    if "eps" in config:
+        assert opt_inst.param_groups[0]["eps"] == config["eps"]
+    if "momentum" in config:
+        assert opt_inst.param_groups[0]["momentum"] == config["momentum"]
+    if "dampening" in config:
+        assert opt_inst.param_groups[0]["dampening"] == config["dampening"]
+    if "nesterov" in config:
+        assert opt_inst.param_groups[0]["nesterov"] == config["nesterov"]
+    if "amsgrad" in config:
+        assert opt_inst.param_groups[0]["amsgrad"] == config["amsgrad"]

From 34f15c4f96aa97eaa392e6c6bbcc9e275bdcc881 Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Fri, 28 Nov 2025 06:00:44 +0000
Subject: [PATCH 03/14] Adding optimizer registry and its test cases

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 QEfficient/finetune/experimental/core/optimizer.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/QEfficient/finetune/experimental/core/optimizer.py b/QEfficient/finetune/experimental/core/optimizer.py
index 64c110ec5..de28848ae 100644
--- a/QEfficient/finetune/experimental/core/optimizer.py
+++ b/QEfficient/finetune/experimental/core/optimizer.py
@@ -32,3 +32,11 @@ def get_optimizer_cls(optimizer_name: str) -> Type[Optimizer]:
     if optimizer_cls is None:
         raise ValueError(f"Unknown optimizer: {optimizer_name}")
     return optimizer_cls
+
+
+def get_optimizer(opt_config):
+    opt_name = opt_config.pop("optimizer_name")
+    opt_cls = get_optimizer_cls(opt_name)
+    opt_config["lr"] = float(opt_config["lr"])
+    optimizer_cls_and_kwargs = (opt_cls, opt_config)
+    return optimizer_cls_and_kwargs

From 1d7d4c2d9255cd39971059fdd35aa8b67dc8bf58 Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Tue, 2 Dec 2025 07:19:58 +0000
Subject: [PATCH 04/14] [QEff. Finetuning]: Optimizer registry and test case
 inclusion

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 .../finetune/experimental/core/optimizer.py   | 23 +++---
 .../experimental/tests/test_optimizer.py      | 78 ++++++++++---------
 2 files changed, 56 insertions(+), 45 deletions(-)

diff --git a/QEfficient/finetune/experimental/core/optimizer.py b/QEfficient/finetune/experimental/core/optimizer.py
index de28848ae..2f77ce285 100644
--- a/QEfficient/finetune/experimental/core/optimizer.py
+++ b/QEfficient/finetune/experimental/core/optimizer.py
@@ -16,18 +16,18 @@
 
 from QEfficient.finetune.experimental.core.component_registry import registry
 
-
-def register_optimizer(optimizer_name: str, cls: Type[Optimizer]) -> None:
-    """Register a new optimizer class."""
-    registry.optimizer(optimizer_name)(cls)
-
-
-register_optimizer("adam", optim.Adam)
-register_optimizer("adamw", optim.AdamW)
-register_optimizer("sgd", optim.SGD)
+registry.optimizer("Adam")(optim.Adam)
+registry.optimizer("AdamW")(optim.AdamW)
+registry.optimizer("SGD")(optim.SGD)
 
 
 def get_optimizer_cls(optimizer_name: str) -> Type[Optimizer]:
+    """
+    Get optimizer class from registry.
+    Args: optimizer_name: Name of the optimizer to retrieve.
+    Returns: Optimizer class.
+    Raises: ValueError: If optimizer name is not found in registry.
+    """
     optimizer_cls = registry.get_optimizer(optimizer_name)
     if optimizer_cls is None:
         raise ValueError(f"Unknown optimizer: {optimizer_name}")
@@ -35,6 +35,11 @@ def get_optimizer_cls(optimizer_name: str) -> Type[Optimizer]:
 
 
 def get_optimizer(opt_config):
+    """
+    Create optimizer from config.
+    Args: opt_config: Dictionary containing optimizer configuration.
+    Returns: Tuple of optimizer class and its arguments.
+    """
     opt_name = opt_config.pop("optimizer_name")
     opt_cls = get_optimizer_cls(opt_name)
     opt_config["lr"] = float(opt_config["lr"])
diff --git a/QEfficient/finetune/experimental/tests/test_optimizer.py b/QEfficient/finetune/experimental/tests/test_optimizer.py
index d84a2a524..d9225f6de 100644
--- a/QEfficient/finetune/experimental/tests/test_optimizer.py
+++ b/QEfficient/finetune/experimental/tests/test_optimizer.py
@@ -5,20 +5,16 @@
 #
 # -----------------------------------------------------------------------------
 
-import inspect
-import sys
-from pathlib import Path
-
 import pytest
 import torch.nn as nn
 import torch.optim as optim
 
-from QEfficient.finetune.experimental.core.optimizer import get_optimizer_cls, register_optimizer
+from QEfficient.finetune.experimental.core.component_registry import registry
+from QEfficient.finetune.experimental.core.optimizer import get_optimizer, get_optimizer_cls
 
-sys.path.insert(0, str(Path(__file__).parent.parent))
 OPTIMIZER_CONFIGS = {
-    "adam": {
-        "optimizer_name": "adam",
+    "Adam": {
+        "optimizer_name": "Adam",
         "opt_cls": optim.Adam,
         "lr": 1e-4,
         "weight_decay": 0.01,
@@ -26,8 +22,8 @@
         "eps": 1e-8,
         "amsgrad": False,
     },
-    "adamw": {
-        "optimizer_name": "adamw",
+    "AdamW": {
+        "optimizer_name": "AdamW",
         "opt_cls": optim.AdamW,
         "lr": 1e-4,
         "weight_decay": 0.01,
@@ -35,8 +31,8 @@
         "eps": 1e-8,
         "amsgrad": False,
     },
-    "sgd": {
-        "optimizer_name": "sgd",
+    "SGD": {
+        "optimizer_name": "SGD",
         "opt_cls": optim.SGD,
         "lr": 1e-4,
         "momentum": 0.9,
@@ -44,6 +40,17 @@
         "dampening": 0.0,
         "nesterov": False,
     },
+    "RMSprop": {
+        "optimizer_name": "RMSprop",
+        "opt_cls": optim.RMSprop,
+    },
+}
+
+REGISTRY_CONFIG = {
+    "RMSprop": {
+        "optimizer_name": "RMSprop",
+        "opt_cls": optim.RMSprop,
+    },
 }
 
 
@@ -58,30 +65,29 @@ def dummy_model():
 
 @pytest.mark.parametrize("opt_name", OPTIMIZER_CONFIGS.keys())
 def test_optimizers(opt_name, dummy_model):
-    """Test that all optimizers can be created with their configs."""
-    # Register optimizer class
+    """Test that all registered optimizers can be created with their configs."""
     config = OPTIMIZER_CONFIGS[opt_name]
-    register_optimizer(config["optimizer_name"], config["opt_cls"])
-    optimizer_class = get_optimizer_cls(config["optimizer_name"])
-    assert optimizer_class is not None
-    assert optimizer_class == config["opt_cls"]
-    valid_params = inspect.signature(optimizer_class).parameters
-    filtered_config = {k: v for k, v in config.items() if k in valid_params}
-    opt_inst = optimizer_class(dummy_model.parameters(), **filtered_config)
+    config.pop("opt_cls")
+    try:
+        optimizer_class_and_kwargs = get_optimizer(config)
+        assert optimizer_class_and_kwargs is not None
+    except ValueError as e:
+        assert "Unknown optimizer" in str(e)
+        return
+    optimizer_class = optimizer_class_and_kwargs[0]
+    opt_inst = optimizer_class(dummy_model.parameters(), **optimizer_class_and_kwargs[1])
     assert isinstance(opt_inst, optim.Optimizer)
     assert len(list(opt_inst.param_groups)) == 1
-    assert opt_inst.param_groups[0]["lr"] == config["lr"]
-    if "weight_decay" in config:
-        assert opt_inst.param_groups[0]["weight_decay"] == config["weight_decay"]
-    if "betas" in config:
-        assert opt_inst.param_groups[0]["betas"] == config["betas"]
-    if "eps" in config:
-        assert opt_inst.param_groups[0]["eps"] == config["eps"]
-    if "momentum" in config:
-        assert opt_inst.param_groups[0]["momentum"] == config["momentum"]
-    if "dampening" in config:
-        assert opt_inst.param_groups[0]["dampening"] == config["dampening"]
-    if "nesterov" in config:
-        assert opt_inst.param_groups[0]["nesterov"] == config["nesterov"]
-    if "amsgrad" in config:
-        assert opt_inst.param_groups[0]["amsgrad"] == config["amsgrad"]
+
+    for key in ["lr", "weight_decay", "betas", "eps", "momentum", "dampening", "nesterov", "amsgrad"]:
+        if key in config:
+            assert opt_inst.param_groups[0][key] == config[key], f"{key} mismatch"
+
+
+@pytest.mark.parametrize("opt_name, opt_cls", REGISTRY_CONFIG.items())
+def test_registered_optimizer(opt_name, opt_cls):
+    """Test that the optimizer registerd correctly."""
+    registry.optimizer(opt_name)(opt_cls)
+    optimizer_class = get_optimizer_cls(opt_name)
+    assert optimizer_class is not None
+    assert optimizer_class == opt_cls

From 926a49f7c217b5c5b6ff7034665cd0e6b8c5d01c Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Wed, 3 Dec 2025 10:01:06 +0000
Subject: [PATCH 05/14] [QEff_Finetuning] Adding callback and its test cases.

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 .../finetune/experimental/core/callbacks.py   | 182 ++++++++++++++++++
 .../experimental/core/utils/profiler_utils.py |  88 +++++++++
 .../experimental/tests/test_callback.py       |  79 ++++++++
 3 files changed, 349 insertions(+)
 create mode 100644 QEfficient/finetune/experimental/tests/test_callback.py

diff --git a/QEfficient/finetune/experimental/core/callbacks.py b/QEfficient/finetune/experimental/core/callbacks.py
index d647b73a6..3267fb2c7 100644
--- a/QEfficient/finetune/experimental/core/callbacks.py
+++ b/QEfficient/finetune/experimental/core/callbacks.py
@@ -4,3 +4,185 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
+
+import json
+import os
+from typing import Any, Dict, Optional
+
+from transformers import (
+    DefaultFlowCallback,
+    EarlyStoppingCallback,
+    PrinterCallback,
+    ProgressCallback,
+    TrainingArguments,
+)
+from transformers.integrations.integration_utils import TensorBoardCallback
+from transformers.trainer_callback import TrainerCallback, TrainerControl, TrainerState
+
+from QEfficient.finetune.experimental.core.component_registry import registry
+from QEfficient.finetune.experimental.core.utils.profiler_utils import (
+    get_op_verifier_ctx,
+    init_qaic_profiling,
+    stop_qaic_profiling,
+)
+
+registry.callback("early_stopping")(EarlyStoppingCallback)
+registry.callback("printer")(PrinterCallback)
+registry.callback("default_flow")(DefaultFlowCallback)
+registry.callback("tensorboard")(TensorBoardCallback)
+
+
+@registry.callback("enhanced_progressbar")
+class EnhancedProgressCallback(ProgressCallback):
+    """
+    A [`TrainerCallback`] that displays the progress of training or evaluation.
+    You can modify `max_str_len` to control how long strings are truncated when logging.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Initialize the callback with optional max_str_len parameter to control string truncation length.
+
+        Args:
+            max_str_len (`int`):
+                Maximum length of strings to display in logs.
+                Longer strings will be truncated with a message.
+        """
+        super().__init__(*args, **kwargs)
+
+    def on_train_begin(self, args, state, control, **kwargs):
+        super().on_train_begin(args, state, control, **kwargs)
+        if self.training_bar is not None:
+            self.training_bar.set_description("Training Progress")
+
+    def on_log(self, args, state, control, logs=None, **kwargs):
+        if state.is_world_process_zero and self.training_bar is not None:
+            # make a shallow copy of logs so we can mutate the fields copied
+            # but avoid doing any value pickling.
+            shallow_logs = {}
+            for k, v in logs.items():
+                if isinstance(v, str) and len(v) > self.max_str_len:
+                    shallow_logs[k] = (
+                        f"[String too long to display, length: {len(v)} > {self.max_str_len}. "
+                        "Consider increasing `max_str_len` if needed.]"
+                    )
+                else:
+                    shallow_logs[k] = v
+            _ = shallow_logs.pop("total_flos", None)
+            # round numbers so that it looks better in console
+            if "epoch" in shallow_logs:
+                shallow_logs["epoch"] = round(shallow_logs["epoch"], 2)
+
+            updated_dict = {}
+            if "epoch" in shallow_logs:
+                updated_dict["epoch"] = shallow_logs["epoch"]
+            if "loss" in shallow_logs:
+                updated_dict["loss"] = shallow_logs["loss"]
+            if "learning_rate" in shallow_logs:
+                updated_dict["lr"] = shallow_logs["learning_rate"]
+            self.training_bar.set_postfix(updated_dict)
+
+
+@registry.callback("json_logger")
+class JSONLoggerCallback(TrainerCallback):
+    """
+    A [`TrainerCallback`] that logs training and evaluation metrics to a JSON file.
+    """
+
+    def __init__(self, log_path=None, *args, **kwargs):
+        """
+        Initialize the callback with the path to the JSON log file.
+
+        Args:
+            log_path (`str`):
+                Path to the jsonl file where logs will be saved.
+        """
+        super().__init__(*args, **kwargs)
+        if log_path is None:
+            log_path = os.path.join(os.environ.get("OUTPUT_DIR", "./"), "training_logs.jsonl")
+        self.log_path = log_path
+        # Ensure the log file is created and empty
+        with open(self.log_path, "w") as _:
+            pass
+
+    def on_log(
+        self,
+        args: TrainingArguments,
+        state: TrainerState,
+        control: TrainerControl,
+        logs: Optional[Dict] = None,
+        **kwargs,
+    ):
+        if logs is None:
+            return
+        logs.pop("entropy")
+        logs.pop("mean_token_accuracy")
+        if state.global_step:
+            logs["global_step"] = state.global_step
+        if logs is not None:
+            with open(self.log_path, "a") as f:
+                json_line = json.dumps(logs, separators=(",", ":"))
+                f.write(json_line + "\n")
+
+
+@registry.callback("qaic_profiler_callback")
+class QAICProfilerCallback(TrainerCallback):
+    def __init__(self, *args, **kwargs):
+        self.start_step = kwargs.get("start_step", -1)
+        self.end_step = kwargs.get("end_step", -1)
+        self.device_ids = kwargs.get("device_ids", [0])
+
+    def on_step_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
+        """
+        Event called at the beginning of a training step. If using gradient accumulation, one training step might take
+        several inputs.
+        """
+        if state.global_step == self.start_step:
+            for device_id in self.device_ids:
+                init_qaic_profiling(True, f"qaic:{device_id}")
+        elif state.global_step == self.end_step:
+            for device_id in self.device_ids:
+                stop_qaic_profiling(True, f"qaic:{device_id}")
+
+
+@registry.callback("qaic_op_by_op_verifier_callback")
+class QAICOpByOpVerifierCallback(TrainerCallback):
+    def __init__(self, *args, **kwargs):
+        self.start_step = kwargs.get("start_step", -1)
+        self.end_step = kwargs.get("end_step", -1)
+        self.trace_dir = kwargs.get("trace_dir", "qaic_op_by_op_traces")
+        self.atol = kwargs.get("atol", 1e-1)
+        self.rtol = kwargs.get("rtol", 1e-5)
+
+    def on_step_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
+        """
+        Event called at the beginning of a training step. If using gradient accumulation, one training step might take
+        several inputs.
+        """
+        if self.start_step <= state.global_step < self.end_step:
+            self.op_verifier_ctx_step = get_op_verifier_ctx(
+                use_op_by_op_verifier=True,
+                device_type="qaic",
+                dump_dir=self.trace_dir,
+                step=state.global_step,
+                atol=self.atol,
+                rtol=self.rtol,
+            )
+            self.op_verifier_ctx_step.__enter__()
+
+    def on_step_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
+        """
+        Event called at the end of a training step. If using gradient accumulation, one training step might take
+        several inputs.
+        """
+        if self.start_step <= state.global_step < self.end_step:
+            if self.op_verifier_ctx_step is not None:
+                self.op_verifier_ctx_step.__exit__(None, None, None)
+
+
+def create_callbacks(name: str, **kwargs) -> Any:
+    """Create a callback instance."""
+    callback_class = registry.get_callback(name)
+    if callback_class is None:
+        raise ValueError(f"Unknown callback: {name}. Available: {registry.list_callbacks()}")
+    return callback_class(**kwargs)
diff --git a/QEfficient/finetune/experimental/core/utils/profiler_utils.py b/QEfficient/finetune/experimental/core/utils/profiler_utils.py
index d647b73a6..e24508e83 100644
--- a/QEfficient/finetune/experimental/core/utils/profiler_utils.py
+++ b/QEfficient/finetune/experimental/core/utils/profiler_utils.py
@@ -4,3 +4,91 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
+
+
+from contextlib import nullcontext
+from typing import ContextManager
+
+import torch
+
+
+def get_op_verifier_ctx(
+    use_op_by_op_verifier: bool,
+    device_type: str,
+    dump_dir: str,
+    step: int,
+    ref_device: str = "cpu",
+    ref_dtype: torch.dtype = torch.float32,
+    atol: float = 1e-1,
+    rtol: float = 1e-5,
+    use_ref_output_on_mismatch: bool = True,
+) -> ContextManager:
+    """Get the op-by-op verifier context manager when op-by-op verification is
+    enabled. It helps in debuging operator related issues by matching the
+    operator execution on qaic v/s cpu. This is meant only for qaic backend.
+
+    Args:
+        use_op_by_op_verifier (bool): Boolean flag to enable op-by-op verifier.
+        device_type (str): Device on which the model is being executed.
+        dump_dir (str): Directory to dump the op-by-op verification results.
+        step (int): Step number for which the op-by-op verification is to be performed.
+        ref_device (str, optional): Device to use as reference for verification.
+            Defaults to "cpu".
+        ref_dtype (torch.dtype, optional): Data type to use as reference
+            datatype for verification. Defaults to torch.float32.
+        atol (float, optional): Absolute tolerance to match the results. Defaults to 1e-1.
+        rtol (float, optional): Relative tolerance to match the results. Defaults to 1e-5.
+        use_ref_output_on_mismatch (bool, optional): If an operator has a
+            mismatch with respect to the reference device, use the reference
+            device outputs and continue rest of the verification. Defaults to True.
+
+    Returns:
+        ContextManager: Instance of context manager used to verify the operators.
+    """
+    if (not use_op_by_op_verifier) or ("qaic" in device_type):
+        return nullcontext()
+
+    # Lazily imported qaic_debug when it is actually needed.
+    import torch_qaic.debug as qaic_debug
+
+    filter_config = qaic_debug.DispatchFilterConfig.default(device_type)
+    dump_dir = dump_dir + "/mismatches/step_" + str(step)
+    return qaic_debug.OpByOpVerifierMode(
+        ref_device=ref_device,
+        ref_dtype=ref_dtype,
+        atol=atol,
+        rtol=rtol,
+        use_ref_output_on_mismatch=use_ref_output_on_mismatch,
+        filter_config=filter_config,
+        dump_root_dir=dump_dir,
+    )
+
+
+def init_qaic_profiling(use_profiler: bool, device_type: str) -> None:
+    """Initialize the qaic profiling tool. Note: The profiler is only works
+    for qaic backend.
+
+    Args:
+        use_profiler (bool): Boolean flag to enable profiler.
+        device_type (str): Device on which the model is being executed.
+    """
+    if (use_profiler) and ("qaic" in device_type):
+        # Lazily imported qaic's qaic_profile when it is actually needed.
+        import torch_qaic.profile as qaic_profile
+
+        qaic_profile.start_profiling(device_type, 1)
+
+
+def stop_qaic_profiling(use_profiler: bool, device_type: str) -> None:
+    """Stop the qaic profiling tool. Note: The profiler is only works
+    for qaic backend.
+
+    Args:
+        use_profiler (bool): Boolean flag to enable profiler.
+        device_type (str): Device on which the model is being executed.
+    """
+    if (use_profiler) and ("qaic" in device_type):
+        # Lazily imported qaic's qaic_profile when it is actually needed.
+        import torch_qaic.profile as qaic_profile
+
+        qaic_profile.stop_profiling(device_type)
diff --git a/QEfficient/finetune/experimental/tests/test_callback.py b/QEfficient/finetune/experimental/tests/test_callback.py
new file mode 100644
index 000000000..6abdb87df
--- /dev/null
+++ b/QEfficient/finetune/experimental/tests/test_callback.py
@@ -0,0 +1,79 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------
+
+import pytest
+from transformers import TrainerCallback
+
+from QEfficient.finetune.experimental.core.callbacks import create_callbacks
+from QEfficient.finetune.experimental.core.component_registry import registry
+
+
+class ModelSummaryCallback(TrainerCallback):
+    def __init__(self, max_depth=1):
+        self.max_depth = max_depth
+
+    def on_train_begin(self, args, state, control, **kwargs):
+        model = kwargs.get("model")
+        if model is not None:
+            print("\n=== Model Summary ===")
+            print(model.__class__.__name__)
+            # Print layers up to max_depth
+            depth = 0
+            for name, module in model.named_children():
+                print(f"  {name}: {module.__class__.__name__}")
+                depth += 1
+                if depth >= self.max_depth:
+                    break
+            print("======================\n")
+
+
+# Setup test data
+CALLBACK_CONFIGS = {
+    "early_stopping": {
+        "name": "early_stopping",
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.001,
+    },
+    "tensorboard": {"name": "tensorboard", "tb_writer": "SummaryWriter"},
+    "model_summary": {
+        "name": "model_summary",
+        "max_depth": 1,
+    },
+}
+
+REGISTRY_CALLBACK_CONFIGS = {
+    "model_summary": {
+        "name": "model_summary",
+        "max_depth": 1,
+        "callback_class": ModelSummaryCallback,
+    },
+}
+
+
+@pytest.mark.parametrize("callback_name", CALLBACK_CONFIGS.keys())
+def test_callbacks(callback_name):
+    """Test that registered callbacks that can be created with their configs."""
+    # Create callbacks using the factory
+    config = CALLBACK_CONFIGS[callback_name]
+    try:
+        callback_inst = create_callbacks(**config)
+    except ValueError as e:
+        assert "Unknown callback" in str(e)
+        return
+    if hasattr(callback_inst, "callback"):
+        assert callback_inst.callback is not None
+    else:
+        assert callback_inst is not None
+
+
+@pytest.mark.parametrize("callback_name,callback_class", REGISTRY_CALLBACK_CONFIGS.items())
+def test_callbacks_registery(callback_name, callback_class):
+    """Test that a callback registered correctly."""
+    registry.callback(callback_name)(callback_class)
+    callback = registry.get_callback(callback_name)
+    assert callback is not None
+    assert callback == callback_class

From 000da81e3575c78186d03033865050cf77fc8b61 Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Fri, 5 Dec 2025 09:05:57 +0000
Subject: [PATCH 06/14] [QEff_Finetuning] Adding callback and its test cases.

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 .../finetune/experimental/core/callbacks.py   | 21 +++++++++++++++++--
 .../experimental/tests/test_callback.py       | 19 +++--------------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/QEfficient/finetune/experimental/core/callbacks.py b/QEfficient/finetune/experimental/core/callbacks.py
index 3267fb2c7..30659e3bb 100644
--- a/QEfficient/finetune/experimental/core/callbacks.py
+++ b/QEfficient/finetune/experimental/core/callbacks.py
@@ -51,11 +51,16 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
     def on_train_begin(self, args, state, control, **kwargs):
+        """Set progress bar description at the start of training."""
         super().on_train_begin(args, state, control, **kwargs)
         if self.training_bar is not None:
             self.training_bar.set_description("Training Progress")
 
     def on_log(self, args, state, control, logs=None, **kwargs):
+        """
+        Override the default `on_log` behavior during training to display
+        the current epoch number, loss, and learning rate in the logs.
+        """
         if state.is_world_process_zero and self.training_bar is not None:
             # make a shallow copy of logs so we can mutate the fields copied
             # but avoid doing any value pickling.
@@ -113,10 +118,11 @@ def on_log(
         logs: Optional[Dict] = None,
         **kwargs,
     ):
+        """Append sanitized log metrics (including global_step) to a JSONL file."""
         if logs is None:
             return
-        logs.pop("entropy")
-        logs.pop("mean_token_accuracy")
+        logs.pop("entropy", None)
+        logs.pop("mean_token_accuracy", None)
         if state.global_step:
             logs["global_step"] = state.global_step
         if logs is not None:
@@ -127,7 +133,13 @@ def on_log(
 
 @registry.callback("qaic_profiler_callback")
 class QAICProfilerCallback(TrainerCallback):
+    """Callback to profile QAIC devices over a specified training step range."""
+
     def __init__(self, *args, **kwargs):
+        """
+        Initialize QAIC profiler settings (start/end steps and target device IDs).
+        """
+
         self.start_step = kwargs.get("start_step", -1)
         self.end_step = kwargs.get("end_step", -1)
         self.device_ids = kwargs.get("device_ids", [0])
@@ -147,7 +159,12 @@ def on_step_begin(self, args: TrainingArguments, state: TrainerState, control: T
 
 @registry.callback("qaic_op_by_op_verifier_callback")
 class QAICOpByOpVerifierCallback(TrainerCallback):
+    """Callback to verify QAIC operations step-by-step during a specified training range."""
+
     def __init__(self, *args, **kwargs):
+        """ "
+        Initialize QAIC Op-by-Op verifier callback with profiling and tolerance settings.
+        """
         self.start_step = kwargs.get("start_step", -1)
         self.end_step = kwargs.get("end_step", -1)
         self.trace_dir = kwargs.get("trace_dir", "qaic_op_by_op_traces")
diff --git a/QEfficient/finetune/experimental/tests/test_callback.py b/QEfficient/finetune/experimental/tests/test_callback.py
index 6abdb87df..18ec3978d 100644
--- a/QEfficient/finetune/experimental/tests/test_callback.py
+++ b/QEfficient/finetune/experimental/tests/test_callback.py
@@ -13,22 +13,8 @@
 
 
 class ModelSummaryCallback(TrainerCallback):
-    def __init__(self, max_depth=1):
-        self.max_depth = max_depth
-
-    def on_train_begin(self, args, state, control, **kwargs):
-        model = kwargs.get("model")
-        if model is not None:
-            print("\n=== Model Summary ===")
-            print(model.__class__.__name__)
-            # Print layers up to max_depth
-            depth = 0
-            for name, module in model.named_children():
-                print(f"  {name}: {module.__class__.__name__}")
-                depth += 1
-                if depth >= self.max_depth:
-                    break
-            print("======================\n")
+    def __init__(self):
+        pass
 
 
 # Setup test data
@@ -68,6 +54,7 @@ def test_callbacks(callback_name):
         assert callback_inst.callback is not None
     else:
         assert callback_inst is not None
+        assert isinstance(callback_inst, TrainerCallback)
 
 
 @pytest.mark.parametrize("callback_name,callback_class", REGISTRY_CALLBACK_CONFIGS.items())

From 94eab127d635ebef3c88260138c17203f55f0372 Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Tue, 9 Dec 2025 06:08:14 +0000
Subject: [PATCH 07/14] [QEff.finetuning] Adding config_manager and its test
 cases.

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 .../experimental/core/config_manager.py       | 648 ++++++++++++++++++
 .../experimental/tests/test_config.yaml       | 117 ++++
 .../experimental/tests/test_config_manager.py |  50 ++
 3 files changed, 815 insertions(+)
 create mode 100644 QEfficient/finetune/experimental/tests/test_config.yaml
 create mode 100644 QEfficient/finetune/experimental/tests/test_config_manager.py

diff --git a/QEfficient/finetune/experimental/core/config_manager.py b/QEfficient/finetune/experimental/core/config_manager.py
index d647b73a6..60ed4d4b6 100644
--- a/QEfficient/finetune/experimental/core/config_manager.py
+++ b/QEfficient/finetune/experimental/core/config_manager.py
@@ -4,3 +4,651 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
+"""
+Configuration manager for handling all training configurations.
+Provides centralized configuration loading, validation, and management.
+"""
+
+import json
+import os
+import sys
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+import yaml
+from transformers.hf_argparser import HfArgumentParser
+
+from QEfficient.finetune.experimental.core.component_registry import registry
+
+
+@dataclass
+class OptimizerConfig:
+    """Configuration for optimizers."""
+
+    optimizer_name: str = field(
+        default="adamw",
+        metadata={"help": "The name of the optimizer to use."},
+    )
+    lr: float = field(
+        default=5e-5,
+        metadata={"help": "The initial learning rate for the optimizer."},
+    )
+    weight_decay: float = field(
+        default=0.01,
+        metadata={"help": "The weight decay to apply (if any)."},
+    )
+
+
+@dataclass
+class SchedulerConfig:
+    """Configuration for learning rate schedulers."""
+
+    scheduler_name: str = field(
+        default="cosine",
+        metadata={"help": "The name of the scheduler to use (e.g., 'linear', 'cosine')."},
+    )
+    warmup_steps: int = field(
+        default=100,
+        metadata={
+            "help": "Number of steps for the warmup phase. If provided "
+            "value is within [0-1) range then it will be interpreted as "
+            "ratio of total training steps for the warmup phase."
+        },
+    )
+
+
+@dataclass
+class DatasetConfig:
+    """Configuration for datasets."""
+
+    tokenizer_name: str = field(
+        default="HuggingFaceTB/SmolLM-135M",
+        metadata={"help": "The name or path of the tokenizer to use."},
+    )
+    dataset_type: str = field(
+        default="seq_completion",
+        metadata={"help": "The type of dataset (e.g., 'seq_completion')."},
+    )
+    dataset_name: str = field(
+        default="knkarthick/samsum",
+        metadata={"help": "The name or path of the dataset."},
+    )
+    dataset_subset: str = field(
+        default="default",
+        metadata={"help": "The subset of the dataset to use, if applicable."},
+    )
+    train_split: str = field(
+        default="train",
+        metadata={"help": "The name of the training split."},
+    )
+    test_split: str = field(
+        default="test",
+        metadata={"help": "The name of the test/validation split."},
+    )
+    max_seq_length: int = field(
+        default=512,
+        metadata={"help": "The maximum sequence length for tokenization."},
+    )
+    split_ratio: float = field(
+        default=0.8,
+        metadata={"help": "Ratio for train/test split, used when only train_split is provided."},
+    )
+    input_columns: list[str] = field(
+        default_factory=lambda: ["text"],
+        metadata={"help": "List of column names containing input text."},
+    )
+    target_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "Name of the column containing target labels (if applicable)."},
+    )
+    train_batch_size: int = field(
+        default=1,
+        metadata={"help": "Batch size per device during training."},
+    )
+    eval_batch_size: int = field(
+        default=1,
+        metadata={"help": "Batch size per device during evaluation."},
+    )
+    num_workers: int = field(
+        default=4,
+        metadata={"help": "Number of workers for dataset processing."},
+    )
+    collate_fn: str = field(
+        default="dynamic_padding",
+        metadata={"help": "The collation function to use (e.g., 'dynamic_padding')."},
+    )
+    group_by_length: bool = field(
+        default=True,
+        metadata={"help": "Whether to group samples by length to minimize padding."},
+    )
+    length_column_name: str = field(
+        default="input_ids",
+        metadata={"help": "The column name containing the length of the input sequences."},
+    )
+    dataloader_pin_memory: bool = field(
+        default=True,
+        metadata={"help": "Whether to pin GPU memory for dataloaders."},
+    )
+    dataloader_persistent_workers: bool = field(
+        default=True,
+        metadata={"help": "Whether to keep dataloader workers alive across epochs."},
+    )
+    dataloader_prefetch_factor: int = field(
+        default=1,
+        metadata={"help": "Number of samples loaded in advance by each worker."},
+    )
+    dataloader_drop_last: bool = field(
+        default=False,
+        metadata={"help": "Whether to drop the last incomplete batch."},
+    )
+    dataloader_num_workers: int = field(
+        default=1,
+        metadata={"help": "Number of workers for the DataLoader."},
+    )
+
+
+@dataclass
+class PeftConfig:
+    """Configuration for PEFT (Parameter-Efficient Fine-Tuning) methods."""
+
+    lora_r: int = field(
+        default=8,
+        metadata={"help": "Lora attention dimension."},
+    )
+    lora_alpha: int = field(
+        default=16,
+        metadata={"help": "Lora alpha."},
+    )
+    lora_dropout: float = field(
+        default=0.1,
+        metadata={"help": "The dropout probability for Lora layers."},
+    )
+    target_modules: list[str] = field(
+        default_factory=lambda: ["q_proj", "v_proj"],
+        metadata={"help": "The modules to apply Lora to."},
+    )
+    bias: str = field(
+        default="none",
+        metadata={"help": "Bias type for Lora ('none', 'all', 'lora_only')."},
+    )
+    task_type: str = field(
+        default="CAUSAL_LM",
+        metadata={"help": "The task type for PEFT (e.g., 'CAUSAL_LM', 'SEQ_2_SEQ_LM')."},
+    )
+    peft_type: str = field(
+        default="LORA",
+        metadata={"help": "The PEFT method to use (e.g., 'LORA', 'IA3')."},
+    )
+
+
+@dataclass
+class ModelConfig:
+    """Configuration for models."""
+
+    model_name: str = field(
+        default="HuggingFaceTB/SmolLM-135M",
+        metadata={"help": "The name or path of the pretrained model."},
+    )
+    model_type: str = field(
+        default="hf",
+        metadata={"help": "The type of model ('hf' for Hugging Face, 'custom' for custom models)."},
+    )
+    auto_class_name: str = field(
+        default="AutoModelForCausalLM",
+        metadata={"help": "The AutoClass name to load the model (e.g., 'AutoModelForCausalLM')."},
+    )
+    load_in_4bit: bool = field(
+        default=False,
+        metadata={"help": "Whether to load the model in 4-bit quantization."},
+    )
+    use_peft: bool = field(
+        default=True,
+        metadata={"help": "Whether to use PEFT (Parameter-Efficient Fine-Tuning)."},
+    )
+    peft_config: Optional[PeftConfig] = field(
+        default_factory=PeftConfig,
+        metadata={"help": "Configuration for PEFT."},
+    )
+    use_cache: bool = field(
+        default=False,
+        metadata={"help": "Whether to use the past key/values in the model for faster decoding."},
+    )
+    attn_implementation: str = field(
+        default="sdpa",
+        metadata={"help": "The attention implementation to use (e.g., 'sdpa', 'eager')."},
+    )
+    device_map: Optional[str] = field(
+        default=None,
+        metadata={"help": "The device map to use for model distribution (e.g., 'auto')."},
+    )
+
+
+@dataclass
+class CallbackConfig:
+    """Configuration for callbacks."""
+
+    callbacks: Dict[str, Dict[str, Any]] = field(
+        default_factory=dict,
+        metadata={"help": "Dictionary of callback configurations, keyed by callback name."},
+    )
+
+
+@dataclass
+class GradientCheckpointingKwargs:
+    """Arguments for gradient checkpointing."""
+
+    preserve_rng_state: bool = field(
+        default=True,
+        metadata={"help": "Whether to preserve the RNG state when checkpointing."},
+    )
+    use_reenrant: bool = field(
+        default=False,
+        metadata={"help": "Whether to use reentrant gradient checkpointing."},
+    )
+
+
+@dataclass
+class DdpConfig:
+    """Arguments for Distributed Data Parallel (DDP) training."""
+
+    ddp_backend: str = field(
+        default="qccl",
+        metadata={"help": "The DDP backend to use (e.g., 'nccl', 'gloo', 'qccl')."},
+    )
+    ddp_find_unused_parameters: bool = field(
+        default=True,
+        metadata={"help": "Whether to find unused parameters in DDP."},
+    )
+    ddp_bucket_cap_mb: Optional[int] = field(
+        default=25,
+        metadata={"help": "The bucket size in MB for DDP communication."},
+    )
+    ddp_broadcast_buffers: bool = field(
+        default=True,
+        metadata={"help": "Whether to broadcast buffers in DDP."},
+    )
+    ddp_timeout: int = field(
+        default=1800,
+        metadata={"help": "Timeout for DDP operations in seconds."},
+    )
+
+
+@dataclass
+class TrainingConfig:
+    """Configuration for training."""
+
+    type: str = field(
+        default="sft",
+        metadata={"help": "The type of training (e.g., 'sft' for Supervised Fine-Tuning)."},
+    )
+    output_dir: str = field(
+        default="./training_results",
+        metadata={"help": "The output directory where the model predictions and checkpoints will be written."},
+    )
+    overwrite_output_dir: bool = field(
+        default=False,
+        metadata={"help": "Whether to overwrite the output directory."},
+    )
+    seed: int = field(
+        default=42,
+        metadata={"help": "Random seed for reproducibility."},
+    )
+
+    do_eval: bool = field(
+        default=True,
+        metadata={"help": "Whether to run evaluation during training."},
+    )
+    eval_strategy: str = field(
+        default="epoch",
+        metadata={"help": "The evaluation strategy to use ('no', 'steps', 'epoch')."},
+    )
+    eval_steps: int = field(
+        default=100,
+        metadata={"help": "Number of update steps between two evaluations."},
+    )
+
+    per_device_train_batch_size: int = field(
+        default=1,
+        metadata={"help": "Batch size per device during training."},
+    )
+    per_device_eval_batch_size: int = field(
+        default=1,
+        metadata={"help": "Batch size per device during evaluation."},
+    )
+    gradient_accumulation_steps: int = field(
+        default=1,
+        metadata={"help": "Number of updates steps to accumulate before performing a backward/update pass."},
+    )
+    num_train_epochs: int = field(
+        default=1,
+        metadata={"help": "Total number of training epochs to perform."},
+    )
+    max_steps: int = field(
+        default=-1,
+        metadata={"help": "If > 0: set total number of training steps to perform."},
+    )
+
+    log_level: str = field(
+        default="info",
+        metadata={"help": "Set the verbosity level of the logs ('debug', 'info', 'warning', 'error')."},
+    )
+    log_on_each_node: bool = field(
+        default=True,
+        metadata={"help": "Whether to log on each node in a distributed setup."},
+    )
+    logging_strategy: str = field(
+        default="steps",
+        metadata={"help": "The logging strategy to use ('no', 'steps', 'epoch')."},
+    )
+    logging_steps: int = field(
+        default=10,
+        metadata={"help": "Number of update steps between two loggings."},
+    )
+
+    save_strategy: str = field(
+        default="epoch",
+        metadata={"help": "The checkpoint save strategy to use ('no', 'steps', 'epoch')."},
+    )
+    save_steps: int = field(
+        default=100,
+        metadata={"help": "Number of update steps between two checkpoints (if save_strategy is 'steps')."},
+    )
+    save_total_limit: int = field(
+        default=5,
+        metadata={"help": "Limit the total amount of checkpoints. Deletes older checkpoints to stay within limit."},
+    )
+    metric_for_best_model: str = field(
+        default="eval_loss",
+        metadata={"help": "The metric to use to compare two models ('eval_loss', etc.)."},
+    )
+
+    dtype: str = field(
+        default="fp16",
+        metadata={"help": "The data type to use for training (e.g., 'fp16', 'bf16')."},
+    )
+
+    gradient_checkpointing: bool = field(
+        default=False,
+        metadata={"help": "Whether to use gradient checkpointing."},
+    )
+    gradient_checkpointing_kwargs: Optional[GradientCheckpointingKwargs] = field(
+        default_factory=GradientCheckpointingKwargs,
+        metadata={"help": "Arguments for gradient checkpointing."},
+    )
+
+    torch_compile: bool = field(
+        default=True,
+        metadata={"help": "Whether to compile the model with `torch.compile`."},
+    )
+    include_tokens_per_second: bool = field(
+        default=True,
+        metadata={"help": "Whether to include tokens per second in logs."},
+    )
+    include_num_input_tokens_seen: bool = field(
+        default=True,
+        metadata={"help": "Whether to include the number of input tokens seen in logs."},
+    )
+    average_tokens_across_devices: bool = field(
+        default=True,
+        metadata={"help": "Whether to average tokens across devices in distributed training."},
+    )
+
+    disable_tqdm: Optional[bool] = field(
+        default=None,
+        metadata={"help": "Whether to disable the tqdm progress bar."},
+    )
+    fsdp_config: Optional[Dict[str, Any]] = field(
+        default=None,
+        metadata={"help": "FSDP configuration dictionary."},
+    )
+    deepspeed_config: Optional[Dict[str, Any]] = field(
+        default=None,
+        metadata={"help": "DeepSpeed configuration dictionary."},
+    )
+    accelerator_config: Optional[Dict[str, Any]] = field(
+        default=None,
+        metadata={"help": "Accelerate configuration dictionary."},
+    )
+    ddp_config: Optional[DdpConfig] = field(
+        default_factory=DdpConfig,
+        metadata={"help": "DDP configuration dictionary."},
+    )
+    use_cpu: Optional[bool] = field(
+        default=None,
+        metadata={"help": "Whether to explicitly run training on CPU."},
+    )
+    resume_from_checkpoint: Optional[str] = field(
+        default=None,
+        metadata={"help": "Path to a checkpoint to resume training from."},
+    )
+    restore_callback_states_from_checkpoint: Optional[bool] = field(
+        default=None,
+        metadata={"help": "Whether to restore callback states from checkpoint."},
+    )
+
+
+@dataclass
+class MasterConfig:
+    """Main training configuration."""
+
+    model: ModelConfig = field(default_factory=ModelConfig, metadata={"help": "Configuration for the model."})
+
+    dataset: DatasetConfig = field(default_factory=DatasetConfig, metadata={"help": "Configuration for the dataset."})
+
+    optimizers: OptimizerConfig = field(
+        default_factory=OptimizerConfig, metadata={"help": "Configuration for optimizers."}
+    )
+
+    scheduler: SchedulerConfig = field(
+        default_factory=SchedulerConfig, metadata={"help": "Configuration for the learning rate scheduler."}
+    )
+
+    callbacks: CallbackConfig = field(default_factory=CallbackConfig, metadata={"help": "Configuration for callbacks."})
+
+    training: TrainingConfig = field(
+        default_factory=TrainingConfig, metadata={"help": "Configuration for training parameters."}
+    )
+
+    extra_params: Dict[str, Any] = field(
+        default_factory=dict, metadata={"help": "Additional top-level parameters not explicitly defined."}
+    )
+
+
+def parse_arguments(config_path: Optional[str] = None) -> MasterConfig:
+    """Create argument parser for the new finetuning interface."""
+    parser = HfArgumentParser(MasterConfig)
+
+    if config_path:
+        config_path = os.path.abspath(config_path)
+        if not os.path.exists(config_path):
+            raise FileNotFoundError(f"Config file not found: {config_path}")
+        if not (config_path.endswith(".yaml") or config_path.endswith(".yml")):
+            raise ValueError(f"Expected a .yaml/.yml file, got: {config_path}")
+
+        try:
+            (master_config,) = parser.parse_yaml_file(yaml_file=config_path)
+            return master_config
+        except Exception as e:
+            raise ValueError(f"Failed to parse YAML config '{config_path}': {e}")
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".yaml"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        master_config = parser.parse_yaml_file(yaml_file=os.path.abspath(sys.argv[1]))[0]
+    else:
+        master_config = parser.parse_args_into_dataclasses()
+
+    return master_config
+
+
+class ConfigManager:
+    """Manages configuration loading, validation, and updates."""
+
+    def __init__(self, config: MasterConfig):
+        """
+        Initialize ConfigManager with either:
+        - Path to config file (str or Path)
+        - Configuration dictionary
+        - None (creates empty config)
+        """
+        self.config = config
+
+    def load_config(self, config_path: Union[str, Path]) -> None:
+        """Load configuration from file."""
+        config_path = Path(config_path)
+
+        if not config_path.exists():
+            raise FileNotFoundError(f"Configuration file not found: {config_path}")
+
+        if config_path.suffix.lower() in [".yaml", ".yml"]:
+            with open(config_path, "r") as f:
+                config_dict = yaml.safe_load(f)
+        elif config_path.suffix.lower() == ".json":
+            with open(config_path, "r") as f:
+                config_dict = json.load(f)
+        else:
+            raise ValueError(f"Unsupported configuration file format: {config_path.suffix}")
+
+        self.update_config(config_dict)
+
+    def update_config(self, config_dict: Dict[str, Any]) -> None:
+        """Update configuration with dictionary values."""
+        for key, value in config_dict.items():
+            if hasattr(self.config, key):
+                if isinstance(value, dict) and hasattr(getattr(self.config, key), "__dataclass_fields__"):
+                    # Special handling for callbacks
+                    if key in ["callbacks", "optimizers", "loss_functions"]:
+                        nested_config = getattr(self.config, key)
+                        for component_name, component_dict in value.items():
+                            if isinstance(component_dict, dict):
+                                getattr(nested_config, key)[component_name] = component_dict
+                            else:
+                                getattr(nested_config, "extra_params")[component_name] = nested_config.extra_params[
+                                    component_name
+                                ] = component_dict
+                    else:
+                        # Update nested dataclass
+                        nested_config = getattr(self.config, key)
+                        for nested_key, nested_value in value.items():
+                            if hasattr(nested_config, nested_key):
+                                setattr(getattr(self.config, key), nested_key, nested_value)
+                            elif hasattr(nested_config, "extra_params"):
+                                getattr(getattr(self.config, key), "extra_params")[nested_key] = nested_value
+                else:
+                    setattr(self.config, key, value)
+            else:
+                # Store unknown parameters in extra_params
+                self.config.extra_params[key] = value
+
+    def save_config(self, output_path: Union[str, Path]) -> None:
+        """Save current configuration to file."""
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        config_dict = self.config
+
+        if output_path.suffix.lower() in [".yaml", ".yml"]:
+            with open(output_path, "w") as f:
+                yaml.dump(config_dict, f, default_flow_style=False, indent=2)
+        elif output_path.suffix.lower() == ".json":
+            with open(output_path, "w") as f:
+                json.dump(config_dict, f, indent=2)
+        else:
+            raise ValueError(f"Unsupported output file format: {output_path.suffix}")
+
+    def validate_config(self) -> None:
+        """Validate configuration parameters."""
+        errors = []
+
+        # Validate model configuration
+        if not self.config.model.model_name:
+            errors.append("Model name is required")
+
+        # Validate dataset configuration
+        if not self.config.dataset.dataset_name:
+            errors.append("Dataset name is required")
+
+        # Validate training parameters
+        if self.config.dataset.train_batch_size <= 0:
+            errors.append("Train batch size must be positive")
+
+        if self.config.dataset.eval_batch_size <= 0:
+            errors.append("Validation batch size must be positive")
+
+        if self.config.training.num_train_epochs <= 0:
+            errors.append("Number of epochs must be positive")
+
+        if self.config.training.gradient_accumulation_steps <= 0:
+            errors.append("Gradient accumulation steps must be positive")
+
+        # Validate device configuration
+        valid_devices = ["cpu", "cuda", "qaic"]
+        if self.config.training.device not in valid_devices:
+            errors.append(f"Device must be one of {valid_devices}")
+
+        if errors:
+            raise ValueError("Configuration validation failed:\n" + "\n".join(f"- {error}" for error in errors))
+
+    def get_callback_config(self) -> Dict[str, Any]:
+        """Get callback configuration as dictionary."""
+        return self.config.callbacks
+
+    def get_optimizer_config(self) -> Dict[str, Any]:
+        """Get optimizer configuration as dictionary."""
+        return self.config.optimizers
+
+    def get_training_config(self) -> Dict[str, Any]:
+        """Get training configuration as dictionary."""
+        return self.config.training
+
+    def get_scheduler_config(self) -> Dict[str, Any]:
+        """Get scheduler configuration as dictionary."""
+        return self.config.scheduler
+
+    def get_dataset_config(self) -> Dict[str, Any]:
+        """Get dataset configuration as dictionary."""
+        return self.config.dataset
+
+    def get_model_config(self) -> Dict[str, Any]:
+        """Get model configuration as dictionary."""
+        return self.config.model
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert configuration to dictionary."""
+        return asdict(self.config)
+
+    def __getattr__(self, name: str) -> Any:
+        """Allow direct access to config attributes."""
+        if hasattr(self.config, name):
+            return getattr(self.config, name)
+        raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
+
+
+def create_trainer_config(name: str, **dependencies) -> tuple:
+    """
+    Create trainer configuration based on registered trainer modules.
+
+    Args:
+        name: Name of the trainer type
+        **dependencies: Any dependencies needed to configure the trainer
+
+    Returns:
+        tuple: (trainer_class, args_class, additional_kwargs)
+    """
+    config = registry.get_trainer_module(name)
+
+    # Process required kwargs based on available dependencies
+    additional_kwargs = {}
+    for kwarg, default in config["required_kwargs"].items():
+        if kwarg in dependencies:
+            additional_kwargs[kwarg] = dependencies[kwarg]
+        elif default != "REQUIRED":
+            additional_kwargs[kwarg] = default
+
+    # Check for missing required arguments
+    for kwarg, default in config["required_kwargs"].items():
+        if kwarg not in additional_kwargs and default == "REQUIRED":
+            raise ValueError(f"Required argument '{kwarg}' not provided for trainer '{name}'")
+
+    return config["trainer_cls"], config["args_cls"], additional_kwargs
diff --git a/QEfficient/finetune/experimental/tests/test_config.yaml b/QEfficient/finetune/experimental/tests/test_config.yaml
new file mode 100644
index 000000000..59d388bd3
--- /dev/null
+++ b/QEfficient/finetune/experimental/tests/test_config.yaml
@@ -0,0 +1,117 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------
+
+# Model configuration
+model:
+  model_type: "hf"  # Hugging Face model
+  auto_class_name: "AutoModelForCausalLM"
+  model_name: "HuggingFaceTB/SmolLM-135M"  # Pretrained model name
+  load_in_4bit: false
+  use_peft: true
+  peft_config:
+    lora_r: 8
+    lora_alpha: 16
+    lora_dropout: 0.1
+    target_modules: ["q_proj", "v_proj"]
+    bias: "none"  # Options: none, all, lora_only
+    task_type: "CAUSAL_LM"  # Options: CAUSAL_LM, SEQ_2_SEQ_LM, etc.
+    peft_type: "LORA"  # Options: LORA, IA3, etc.
+
+# Dataset configuration
+dataset:
+  tokenizer_name: "HuggingFaceTB/SmolLM-135M"
+  dataset_type: "seq_completion"
+  # dataset_name: "Arthur-LAGACHERIE/very-smollm-corpus-0.5M"
+  dataset_name: "knkarthick/samsum"
+  train_split: "train"
+  max_seq_length: 512
+  split_ratio: 0.8  # Ratio for train/test split, used when only train_split is provided
+  test_split: "test"
+  group_by_length: True
+  num_workers: 4
+  pin_memory: True
+  persistent_workers: True
+  prefetch_factor: 1
+  drop_last: False
+
+# Training configuration
+training:
+  type: "sft"
+  output_dir: "./training_results"
+  overwrite_output_dir: False
+  seed: 42
+
+  do_eval: True
+  eval_strategy: "epoch"
+  eval_steps: 100
+
+  per_device_train_batch_size: 1
+  per_device_eval_batch_size: 1
+  gradient_accumulation_steps: 1
+  num_train_epochs: 1
+  max_steps: -1
+
+  log_level: "info"
+  log_on_each_node: True
+  logging_strategy: "steps"
+  logging_steps: 10
+
+  save_strategy: "epoch"
+  save_steps: 100   # If 'save_strategy' is 'steps' then it will be used.
+  save_total_limit: 5
+  metric_for_best_model: "eval_loss"
+
+  dtype: "fp16"
+  completion_only_loss: True
+  report_to: "trackio"
+
+  ddp_config:
+    ddp_backend: "qccl"
+    ddp_find_unused_parameters: False
+    ddp_bucket_cap_mb: 25
+    ddp_broadcast_buffers: null
+    ddp_timeout: 1800
+
+  # Uncomment below to explicitly run on CPU
+  use_cpu: False
+
+  gradient_checkpointing: False
+  gradient_checkpointing_kwargs:
+    preserve_rng_state : True
+    use_reenrant: False
+
+  torch_compile: True
+  include_tokens_per_second: True
+  include_num_input_tokens_seen: True
+  average_tokens_across_devices: True
+
+# Optimizer configuration
+optimizers:
+  optimizer_name: "adamw"
+  lr: 5e-5
+  weight_decay: 0.01
+
+
+# “linear” → transformers.get_linear_schedule_with_warmup
+# “cosine” → transformers.get_cosine_schedule_with_warmup
+# “cosine_with_restarts” -->transformers.get_cosine_with_hard_restarts_schedule_with_warmup
+# “polynomial” → transformers.get_polynomial_decay_schedule_with_warmup
+# “constant” → transformers.get_constant_schedule
+# “constant_with_warmup” → transformers.get_constant_schedule_with_warmup
+# “inverse_sqrt” → transformers.get_inverse_sqrt_schedule
+
+scheduler:
+  scheduler_name: "cosine"
+  warmup_steps: 100   # warmup_steps or warmup_ratio
+  warmup_ratio: 0.1
+
+callbacks:
+  early_stopping:
+    early_stopping_patience: 3
+    early_stopping_threshold: 0.001
+  tensorboard:
+
diff --git a/QEfficient/finetune/experimental/tests/test_config_manager.py b/QEfficient/finetune/experimental/tests/test_config_manager.py
new file mode 100644
index 000000000..10105a33e
--- /dev/null
+++ b/QEfficient/finetune/experimental/tests/test_config_manager.py
@@ -0,0 +1,50 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------
+
+
+from pathlib import Path
+
+import pytest
+
+from QEfficient.finetune.experimental.core.config_manager import ConfigManager, parse_arguments
+
+
+@pytest.fixture
+def config_path() -> Path:
+    here = Path(__file__).resolve().parent
+    return (here / "test_config.yaml").resolve()
+
+
+# git commit -s -m "[QEff.finetuning] Adding config_manager and its test cases."
+
+
+def test_config(config_path):
+    # parse the yaml file
+    master_config = parse_arguments(config_path)
+    config_manager = ConfigManager(master_config)
+    # Test that the config manager is initialized correctly
+    assert isinstance(config_manager, ConfigManager)
+
+    # Test that all required fields are present
+    missing = [
+        a
+        for a in ("model", "dataset", "optimizers", "scheduler", "callbacks", "training")
+        if not hasattr(config_manager, a)
+    ]
+    assert not missing, f"Missing attributes: {missing}"
+    trainer_config = config_manager.get_training_config()
+    assert (hasattr(trainer_config, attr) for attr in ("output_dir", "train_batch_size", "num_epochs"))
+    dataset_config = config_manager.get_dataset_config()
+    assert (hasattr(dataset_config, attr) for attr in ("dataset_type", "dataset_name", "tokenizer_name"))
+    model_config = config_manager.get_model_config()
+    assert (hasattr(model_config, attr) for attr in ("model_type", "model_name", "use_peft"))
+    scheduler_config = config_manager.get_scheduler_config()
+    assert (hasattr(scheduler_config, attr) for attr in ("scheduler_name"))
+    callback_config = config_manager.get_callback_config()
+    assert (hasattr(callback_config, attr) for attr in ("earlystopping"))
+    optimizer_config = config_manager.get_optimizer_config()
+    assert (hasattr(optimizer_config, attr) for attr in ("optimizer_name", "lr"))

From 28ec40b624ff80bd2d3041683caa44835989e189 Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Tue, 9 Dec 2025 07:31:48 +0000
Subject: [PATCH 08/14] [QEff.finetuning] Adding config_manager and its test
 cases.

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 QEfficient/finetune/experimental/tests/test_config_manager.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/QEfficient/finetune/experimental/tests/test_config_manager.py b/QEfficient/finetune/experimental/tests/test_config_manager.py
index 10105a33e..b3b9b0b24 100644
--- a/QEfficient/finetune/experimental/tests/test_config_manager.py
+++ b/QEfficient/finetune/experimental/tests/test_config_manager.py
@@ -19,9 +19,6 @@ def config_path() -> Path:
     return (here / "test_config.yaml").resolve()
 
 
-# git commit -s -m "[QEff.finetuning] Adding config_manager and its test cases."
-
-
 def test_config(config_path):
     # parse the yaml file
     master_config = parse_arguments(config_path)

From 1f0a4df2ba4f0204847ec4b72ee3a486bb352c57 Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Thu, 11 Dec 2025 07:26:49 +0000
Subject: [PATCH 09/14] [QEff.finetuning] Adding config_manager and its
 test_cases.

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 .../experimental/core/config_manager.py       | 233 +++++++++++++-----
 .../experimental/core/utils/profiler_utils.py |  88 -------
 .../experimental/tests/test_config.yaml       |  33 +--
 .../experimental/tests/test_config_manager.py |  25 +-
 4 files changed, 196 insertions(+), 183 deletions(-)

diff --git a/QEfficient/finetune/experimental/core/config_manager.py b/QEfficient/finetune/experimental/core/config_manager.py
index 60ed4d4b6..b28c2e1e3 100644
--- a/QEfficient/finetune/experimental/core/config_manager.py
+++ b/QEfficient/finetune/experimental/core/config_manager.py
@@ -11,10 +11,9 @@
 
 import json
 import os
-import sys
-from dataclasses import asdict, dataclass, field
+from dataclasses import asdict, dataclass, field, fields, is_dataclass
 from pathlib import Path
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 
 import yaml
 from transformers.hf_argparser import HfArgumentParser
@@ -257,7 +256,7 @@ class DdpConfig:
         metadata={"help": "The DDP backend to use (e.g., 'nccl', 'gloo', 'qccl')."},
     )
     ddp_find_unused_parameters: bool = field(
-        default=True,
+        default=False,
         metadata={"help": "Whether to find unused parameters in DDP."},
     )
     ddp_bucket_cap_mb: Optional[int] = field(
@@ -294,7 +293,10 @@ class TrainingConfig:
         default=42,
         metadata={"help": "Random seed for reproducibility."},
     )
-
+    device: str = field(
+        default="qaic",
+        metadata={"help": "The device to use for training ('cuda', 'cpu', etc.)."},
+    )
     do_eval: bool = field(
         default=True,
         metadata={"help": "Whether to run evaluation during training."},
@@ -307,7 +309,6 @@ class TrainingConfig:
         default=100,
         metadata={"help": "Number of update steps between two evaluations."},
     )
-
     per_device_train_batch_size: int = field(
         default=1,
         metadata={"help": "Batch size per device during training."},
@@ -381,10 +382,6 @@ class TrainingConfig:
         default=True,
         metadata={"help": "Whether to compile the model with `torch.compile`."},
     )
-    include_tokens_per_second: bool = field(
-        default=True,
-        metadata={"help": "Whether to include tokens per second in logs."},
-    )
     include_num_input_tokens_seen: bool = field(
         default=True,
         metadata={"help": "Whether to include the number of input tokens seen in logs."},
@@ -426,6 +423,14 @@ class TrainingConfig:
         default=None,
         metadata={"help": "Whether to restore callback states from checkpoint."},
     )
+    report_to: Optional[List[str]] = field(
+        default=None,
+        metadata={"help": "The list of integrations to report the results and logs to."},
+    )
+    completion_only_loss: Optional[bool] = field(
+        default=False,
+        metadata={"help": "Whether to compute loss only on completion tokens."},
+    )
 
 
 @dataclass
@@ -455,7 +460,7 @@ class MasterConfig:
     )
 
 
-def parse_arguments(config_path: Optional[str] = None) -> MasterConfig:
+def parse_arguments(config_path: Optional[str] = None, args: Optional[List[str]] = None) -> MasterConfig:
     """Create argument parser for the new finetuning interface."""
     parser = HfArgumentParser(MasterConfig)
 
@@ -472,12 +477,15 @@ def parse_arguments(config_path: Optional[str] = None) -> MasterConfig:
         except Exception as e:
             raise ValueError(f"Failed to parse YAML config '{config_path}': {e}")
 
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".yaml"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        master_config = parser.parse_yaml_file(yaml_file=os.path.abspath(sys.argv[1]))[0]
+    args = [] if args is None else args
+    # If a single positional YAML file was passed via args, parse it as YAML
+    if len(args) == 1 and (args[0].endswith(".yaml") or args[0].endswith(".yml")):
+        yaml_path = os.path.abspath(args[0])
+        (master_config,) = parser.parse_yaml_file(yaml_file=yaml_path)
     else:
-        master_config = parser.parse_args_into_dataclasses()
+        (master_config,) = parser.parse_args_into_dataclasses(args=args)
+        master_config = asdict(master_config)
+        master_config = MasterConfig(**master_config)
 
     return master_config
 
@@ -512,34 +520,58 @@ def load_config(self, config_path: Union[str, Path]) -> None:
 
         self.update_config(config_dict)
 
+    def _ensure_extra_params(self, obj) -> Dict[str, Any]:
+        """Ensure obj.extra_params exists and is a dict; return it."""
+        ep = getattr(obj, "extra_params", None)
+        if ep is None:
+            setattr(obj, "extra_params", {})
+            ep = obj.extra_params
+        if not isinstance(ep, dict):
+            raise TypeError("extra_params must be a dict.")
+        return ep
+
+    def _stash_top_level_extra(self, section: str, nested_key: str, value: Any) -> None:
+        """Store unknown nested values under MasterConfig.extra_params['section.nested_key']."""
+        ep = self._ensure_extra_params(self.config)
+        ep[f"{section}.{nested_key}"] = value
+
     def update_config(self, config_dict: Dict[str, Any]) -> None:
         """Update configuration with dictionary values."""
+
+        SPECIAL_KEYS = {"callbacks"}
+
         for key, value in config_dict.items():
             if hasattr(self.config, key):
-                if isinstance(value, dict) and hasattr(getattr(self.config, key), "__dataclass_fields__"):
-                    # Special handling for callbacks
-                    if key in ["callbacks", "optimizers", "loss_functions"]:
-                        nested_config = getattr(self.config, key)
-                        for component_name, component_dict in value.items():
-                            if isinstance(component_dict, dict):
-                                getattr(nested_config, key)[component_name] = component_dict
-                            else:
-                                getattr(nested_config, "extra_params")[component_name] = nested_config.extra_params[
-                                    component_name
-                                ] = component_dict
+                target = getattr(self.config, key)
+
+                # Special handling for callbacks (dict inside CallbackConfig)
+                if key in SPECIAL_KEYS and isinstance(value, dict):
+                    if is_dataclass(target) and hasattr(target, "callbacks") and isinstance(target.callbacks, dict):
+                        for component_name, component_cfg in value.items():
+                            target.callbacks[component_name] = component_cfg
+                    elif isinstance(target, dict):
+                        target.update(value)
                     else:
-                        # Update nested dataclass
-                        nested_config = getattr(self.config, key)
-                        for nested_key, nested_value in value.items():
-                            if hasattr(nested_config, nested_key):
-                                setattr(getattr(self.config, key), nested_key, nested_value)
-                            elif hasattr(nested_config, "extra_params"):
-                                getattr(getattr(self.config, key), "extra_params")[nested_key] = nested_value
-                else:
-                    setattr(self.config, key, value)
+                        self._stash_top_level_extra(key, "__all__", value)
+                    continue
+
+                if isinstance(value, dict) and is_dataclass(target):
+                    known = {f.name for f in fields(target)}
+                    for nested_key, nested_value in value.items():
+                        if nested_key in known:
+                            setattr(target, nested_key, nested_value)
+                        else:
+                            self._stash_top_level_extra(key, nested_key, nested_value)
+                    continue
+
+                if isinstance(value, dict) and isinstance(target, dict):
+                    target.update(value)
+                    continue
+                setattr(self.config, key, value)
+
             else:
-                # Store unknown parameters in extra_params
-                self.config.extra_params[key] = value
+                ep = self._ensure_extra_params(self.config)
+                ep[key] = value
 
     def save_config(self, output_path: Union[str, Path]) -> None:
         """Save current configuration to file."""
@@ -557,38 +589,105 @@ def save_config(self, output_path: Union[str, Path]) -> None:
         else:
             raise ValueError(f"Unsupported output file format: {output_path.suffix}")
 
-    def validate_config(self) -> None:
-        """Validate configuration parameters."""
-        errors = []
-
-        # Validate model configuration
-        if not self.config.model.model_name:
-            errors.append("Model name is required")
-
-        # Validate dataset configuration
-        if not self.config.dataset.dataset_name:
-            errors.append("Dataset name is required")
-
-        # Validate training parameters
-        if self.config.dataset.train_batch_size <= 0:
-            errors.append("Train batch size must be positive")
-
-        if self.config.dataset.eval_batch_size <= 0:
-            errors.append("Validation batch size must be positive")
+    def _push(self, errs: List[str], cond: bool, msg: str) -> None:
+        """Append msg to errs if cond is True."""
+        if cond:
+            errs.append(msg)
 
-        if self.config.training.num_train_epochs <= 0:
-            errors.append("Number of epochs must be positive")
-
-        if self.config.training.gradient_accumulation_steps <= 0:
-            errors.append("Gradient accumulation steps must be positive")
-
-        # Validate device configuration
+    def validate_config(self) -> None:
+        """
+        Validate configuration parameters for MasterConfig.
+        """
+        errors: List[str] = []
+
+        cfg = self.config
+        model = getattr(cfg, "model", {})
+        dataset = getattr(cfg, "dataset", {})
+        training = getattr(cfg, "training", {})
+
+        # ---------- Model ----------
+        self._push(errors, not model.get("model_name"), "model.model_name is required.")
+
+        # PEFT validation
+        if model.get("use_peft"):
+            pc = model.get("peft_config", {})
+            self._push(errors, not isinstance(pc, dict), "model.peft_config must be a dict when use_peft=True.")
+            if isinstance(pc, dict):
+                self._push(
+                    errors,
+                    not isinstance(pc.get("lora_r", 0), int) or pc.get("lora_r", 0) <= 0,
+                    "model.peft_config.lora_r must be a positive integer.",
+                )
+                self._push(
+                    errors,
+                    not isinstance(pc.get("lora_alpha", 0), int) or pc.get("lora_alpha", 0) <= 0,
+                    "model.peft_config.lora_alpha must be a positive integer.",
+                )
+                self._push(
+                    errors,
+                    not (0.0 <= float(pc.get("lora_dropout", 0.0)) < 1.0),
+                    "model.peft_config.lora_dropout must be in [0,1).",
+                )
+
+        # ---------- Dataset ----------
+        self._push(errors, not dataset.get("dataset_name"), "dataset.dataset_name is required.")
+        self._push(errors, not dataset.get("tokenizer_name"), "dataset.tokenizer_name is required.")
+        self._push(errors, dataset.get("max_seq_length", 0) <= 0, "dataset.max_seq_length must be positive.")
+
+        # ---------- Training ----------
+        # Batch sizes
+        self._push(
+            errors,
+            training.get("per_device_train_batch_size", 0) <= 0,
+            "training.per_device_train_batch_size must be positive.",
+        )
+        self._push(
+            errors,
+            training.get("per_device_eval_batch_size", 0) <= 0,
+            "training.per_device_eval_batch_size must be positive.",
+        )
+
+        # Epochs / steps
+        n_epochs = training.get("num_train_epochs", 0)
+        max_steps = training.get("max_steps", -1)
+        self._push(
+            errors,
+            n_epochs <= 0 and max_steps <= 0,
+            "Either training.num_train_epochs > 0 or training.max_steps > 0 must be set.",
+        )
+
+        # Gradient accumulation
+        self._push(
+            errors,
+            training.get("gradient_accumulation_steps", 0) <= 0,
+            "training.gradient_accumulation_steps must be positive.",
+        )
+
+        # Logging / saving configs
+        self._push(errors, training.get("logging_steps", 0) < 0, "training.logging_steps must be >= 0.")
+        self._push(errors, training.get("save_total_limit", 0) < 0, "training.save_total_limit must be >= 0.")
+
+        # Device
         valid_devices = ["cpu", "cuda", "qaic"]
-        if self.config.training.device not in valid_devices:
-            errors.append(f"Device must be one of {valid_devices}")
-
+        training_device = training.get("device", None)
+        if training_device not in valid_devices:
+            self._push(errors, training_device not in valid_devices, f"training.device must be one of {valid_devices}.")
+
+        # DDP config
+        ddp = training.get("ddp_config", {})
+        if isinstance(ddp, dict):
+            backend = ddp.get("ddp_backend")
+            # Accept qccl for Qualcomm, nccl for CUDA, gloo for CPU
+            self._push(
+                errors,
+                backend not in {"qccl", "nccl", "gloo", None},
+                "training.ddp_config.ddp_backend must be one of {'qccl','nccl','gloo'} or omitted.",
+            )
+
+        # ---------- Final ----------
         if errors:
-            raise ValueError("Configuration validation failed:\n" + "\n".join(f"- {error}" for error in errors))
+            # Join messages with bullet points for readability
+            raise ValueError("Configuration validation failed:\n- " + "\n- ".join(errors))
 
     def get_callback_config(self) -> Dict[str, Any]:
         """Get callback configuration as dictionary."""
diff --git a/QEfficient/finetune/experimental/core/utils/profiler_utils.py b/QEfficient/finetune/experimental/core/utils/profiler_utils.py
index e24508e83..d647b73a6 100644
--- a/QEfficient/finetune/experimental/core/utils/profiler_utils.py
+++ b/QEfficient/finetune/experimental/core/utils/profiler_utils.py
@@ -4,91 +4,3 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
-
-
-from contextlib import nullcontext
-from typing import ContextManager
-
-import torch
-
-
-def get_op_verifier_ctx(
-    use_op_by_op_verifier: bool,
-    device_type: str,
-    dump_dir: str,
-    step: int,
-    ref_device: str = "cpu",
-    ref_dtype: torch.dtype = torch.float32,
-    atol: float = 1e-1,
-    rtol: float = 1e-5,
-    use_ref_output_on_mismatch: bool = True,
-) -> ContextManager:
-    """Get the op-by-op verifier context manager when op-by-op verification is
-    enabled. It helps in debuging operator related issues by matching the
-    operator execution on qaic v/s cpu. This is meant only for qaic backend.
-
-    Args:
-        use_op_by_op_verifier (bool): Boolean flag to enable op-by-op verifier.
-        device_type (str): Device on which the model is being executed.
-        dump_dir (str): Directory to dump the op-by-op verification results.
-        step (int): Step number for which the op-by-op verification is to be performed.
-        ref_device (str, optional): Device to use as reference for verification.
-            Defaults to "cpu".
-        ref_dtype (torch.dtype, optional): Data type to use as reference
-            datatype for verification. Defaults to torch.float32.
-        atol (float, optional): Absolute tolerance to match the results. Defaults to 1e-1.
-        rtol (float, optional): Relative tolerance to match the results. Defaults to 1e-5.
-        use_ref_output_on_mismatch (bool, optional): If an operator has a
-            mismatch with respect to the reference device, use the reference
-            device outputs and continue rest of the verification. Defaults to True.
-
-    Returns:
-        ContextManager: Instance of context manager used to verify the operators.
-    """
-    if (not use_op_by_op_verifier) or ("qaic" in device_type):
-        return nullcontext()
-
-    # Lazily imported qaic_debug when it is actually needed.
-    import torch_qaic.debug as qaic_debug
-
-    filter_config = qaic_debug.DispatchFilterConfig.default(device_type)
-    dump_dir = dump_dir + "/mismatches/step_" + str(step)
-    return qaic_debug.OpByOpVerifierMode(
-        ref_device=ref_device,
-        ref_dtype=ref_dtype,
-        atol=atol,
-        rtol=rtol,
-        use_ref_output_on_mismatch=use_ref_output_on_mismatch,
-        filter_config=filter_config,
-        dump_root_dir=dump_dir,
-    )
-
-
-def init_qaic_profiling(use_profiler: bool, device_type: str) -> None:
-    """Initialize the qaic profiling tool. Note: The profiler is only works
-    for qaic backend.
-
-    Args:
-        use_profiler (bool): Boolean flag to enable profiler.
-        device_type (str): Device on which the model is being executed.
-    """
-    if (use_profiler) and ("qaic" in device_type):
-        # Lazily imported qaic's qaic_profile when it is actually needed.
-        import torch_qaic.profile as qaic_profile
-
-        qaic_profile.start_profiling(device_type, 1)
-
-
-def stop_qaic_profiling(use_profiler: bool, device_type: str) -> None:
-    """Stop the qaic profiling tool. Note: The profiler is only works
-    for qaic backend.
-
-    Args:
-        use_profiler (bool): Boolean flag to enable profiler.
-        device_type (str): Device on which the model is being executed.
-    """
-    if (use_profiler) and ("qaic" in device_type):
-        # Lazily imported qaic's qaic_profile when it is actually needed.
-        import torch_qaic.profile as qaic_profile
-
-        qaic_profile.stop_profiling(device_type)
diff --git a/QEfficient/finetune/experimental/tests/test_config.yaml b/QEfficient/finetune/experimental/tests/test_config.yaml
index 59d388bd3..e97e99d58 100644
--- a/QEfficient/finetune/experimental/tests/test_config.yaml
+++ b/QEfficient/finetune/experimental/tests/test_config.yaml
@@ -5,9 +5,9 @@
 #
 # -----------------------------------------------------------------------------
 
-# Model configuration
+# model configuration
 model:
-  model_type: "hf"  # Hugging Face model
+  model_type: "hf"  
   auto_class_name: "AutoModelForCausalLM"
   model_name: "HuggingFaceTB/SmolLM-135M"  # Pretrained model name
   load_in_4bit: false
@@ -17,9 +17,9 @@ model:
     lora_alpha: 16
     lora_dropout: 0.1
     target_modules: ["q_proj", "v_proj"]
-    bias: "none"  # Options: none, all, lora_only
-    task_type: "CAUSAL_LM"  # Options: CAUSAL_LM, SEQ_2_SEQ_LM, etc.
-    peft_type: "LORA"  # Options: LORA, IA3, etc.
+    bias: "none" 
+    task_type: "CAUSAL_LM" 
+    peft_type: "LORA" 
 
 # Dataset configuration
 dataset:
@@ -33,10 +33,10 @@ dataset:
   test_split: "test"
   group_by_length: True
   num_workers: 4
-  pin_memory: True
-  persistent_workers: True
-  prefetch_factor: 1
-  drop_last: False
+  dataloader_pin_memory: True
+  dataloader_persistent_workers: True
+  dataloader_prefetch_factor: 1
+  dataloader_drop_last: False
 
 # Training configuration
 training:
@@ -44,7 +44,7 @@ training:
   output_dir: "./training_results"
   overwrite_output_dir: False
   seed: 42
-
+  device: "qaic"
   do_eval: True
   eval_strategy: "epoch"
   eval_steps: 100
@@ -61,7 +61,6 @@ training:
   logging_steps: 10
 
   save_strategy: "epoch"
-  save_steps: 100   # If 'save_strategy' is 'steps' then it will be used.
   save_total_limit: 5
   metric_for_best_model: "eval_loss"
 
@@ -76,7 +75,6 @@ training:
     ddp_broadcast_buffers: null
     ddp_timeout: 1800
 
-  # Uncomment below to explicitly run on CPU
   use_cpu: False
 
   gradient_checkpointing: False
@@ -85,7 +83,6 @@ training:
     use_reenrant: False
 
   torch_compile: True
-  include_tokens_per_second: True
   include_num_input_tokens_seen: True
   average_tokens_across_devices: True
 
@@ -95,19 +92,9 @@ optimizers:
   lr: 5e-5
   weight_decay: 0.01
 
-
-# “linear” → transformers.get_linear_schedule_with_warmup
-# “cosine” → transformers.get_cosine_schedule_with_warmup
-# “cosine_with_restarts” -->transformers.get_cosine_with_hard_restarts_schedule_with_warmup
-# “polynomial” → transformers.get_polynomial_decay_schedule_with_warmup
-# “constant” → transformers.get_constant_schedule
-# “constant_with_warmup” → transformers.get_constant_schedule_with_warmup
-# “inverse_sqrt” → transformers.get_inverse_sqrt_schedule
-
 scheduler:
   scheduler_name: "cosine"
   warmup_steps: 100   # warmup_steps or warmup_ratio
-  warmup_ratio: 0.1
 
 callbacks:
   early_stopping:
diff --git a/QEfficient/finetune/experimental/tests/test_config_manager.py b/QEfficient/finetune/experimental/tests/test_config_manager.py
index b3b9b0b24..fd2abfd48 100644
--- a/QEfficient/finetune/experimental/tests/test_config_manager.py
+++ b/QEfficient/finetune/experimental/tests/test_config_manager.py
@@ -20,11 +20,14 @@ def config_path() -> Path:
 
 
 def test_config(config_path):
-    # parse the yaml file
-    master_config = parse_arguments(config_path)
+    master_config = parse_arguments(args=[])
     config_manager = ConfigManager(master_config)
-    # Test that the config manager is initialized correctly
     assert isinstance(config_manager, ConfigManager)
+    config_manager.load_config(config_path)
+    try:
+        config_manager.validate_config()
+    except Exception as e:
+        pytest.fail(f"Config validation failed with error: {e}")
 
     # Test that all required fields are present
     missing = [
@@ -34,14 +37,26 @@ def test_config(config_path):
     ]
     assert not missing, f"Missing attributes: {missing}"
     trainer_config = config_manager.get_training_config()
-    assert (hasattr(trainer_config, attr) for attr in ("output_dir", "train_batch_size", "num_epochs"))
+    assert trainer_config is not None
+    assert isinstance(trainer_config, dict)
+    assert (hasattr(trainer_config, attr) for attr in ("output_dir", "train_batch_size", "num_epochs", "ddp_config"))
     dataset_config = config_manager.get_dataset_config()
+    assert dataset_config is not None
+    assert isinstance(dataset_config, dict)
     assert (hasattr(dataset_config, attr) for attr in ("dataset_type", "dataset_name", "tokenizer_name"))
     model_config = config_manager.get_model_config()
-    assert (hasattr(model_config, attr) for attr in ("model_type", "model_name", "use_peft"))
+    assert model_config is not None
+    assert isinstance(model_config, dict)
+    assert (hasattr(model_config, attr) for attr in ("model_type", "model_name", "use_peft", "peft_config"))
     scheduler_config = config_manager.get_scheduler_config()
+    assert scheduler_config is not None
+    assert isinstance(scheduler_config, dict)
     assert (hasattr(scheduler_config, attr) for attr in ("scheduler_name"))
     callback_config = config_manager.get_callback_config()
+    assert callback_config is not None
+    assert isinstance(callback_config, dict)
     assert (hasattr(callback_config, attr) for attr in ("earlystopping"))
     optimizer_config = config_manager.get_optimizer_config()
+    assert optimizer_config is not None
+    assert isinstance(optimizer_config, dict)
     assert (hasattr(optimizer_config, attr) for attr in ("optimizer_name", "lr"))

From 2cd53dbfc4c181949e5a0267f76072b44a24e04f Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Thu, 11 Dec 2025 13:01:15 +0530
Subject: [PATCH 10/14] Delete
 QEfficient/finetune/experimental/tests/test_optimizer.py

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 .../experimental/tests/test_optimizer.py      | 93 -------------------
 1 file changed, 93 deletions(-)
 delete mode 100644 QEfficient/finetune/experimental/tests/test_optimizer.py

diff --git a/QEfficient/finetune/experimental/tests/test_optimizer.py b/QEfficient/finetune/experimental/tests/test_optimizer.py
deleted file mode 100644
index d9225f6de..000000000
--- a/QEfficient/finetune/experimental/tests/test_optimizer.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# -----------------------------------------------------------------------------
-#
-# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
-# SPDX-License-Identifier: BSD-3-Clause
-#
-# -----------------------------------------------------------------------------
-
-import pytest
-import torch.nn as nn
-import torch.optim as optim
-
-from QEfficient.finetune.experimental.core.component_registry import registry
-from QEfficient.finetune.experimental.core.optimizer import get_optimizer, get_optimizer_cls
-
-OPTIMIZER_CONFIGS = {
-    "Adam": {
-        "optimizer_name": "Adam",
-        "opt_cls": optim.Adam,
-        "lr": 1e-4,
-        "weight_decay": 0.01,
-        "betas": (0.9, 0.999),
-        "eps": 1e-8,
-        "amsgrad": False,
-    },
-    "AdamW": {
-        "optimizer_name": "AdamW",
-        "opt_cls": optim.AdamW,
-        "lr": 1e-4,
-        "weight_decay": 0.01,
-        "betas": (0.9, 0.999),
-        "eps": 1e-8,
-        "amsgrad": False,
-    },
-    "SGD": {
-        "optimizer_name": "SGD",
-        "opt_cls": optim.SGD,
-        "lr": 1e-4,
-        "momentum": 0.9,
-        "weight_decay": 0.01,
-        "dampening": 0.0,
-        "nesterov": False,
-    },
-    "RMSprop": {
-        "optimizer_name": "RMSprop",
-        "opt_cls": optim.RMSprop,
-    },
-}
-
-REGISTRY_CONFIG = {
-    "RMSprop": {
-        "optimizer_name": "RMSprop",
-        "opt_cls": optim.RMSprop,
-    },
-}
-
-
-@pytest.fixture
-def dummy_model():
-    return nn.Sequential(
-        nn.Linear(10, 5),
-        nn.ReLU(),
-        nn.Linear(5, 1),
-    )
-
-
-@pytest.mark.parametrize("opt_name", OPTIMIZER_CONFIGS.keys())
-def test_optimizers(opt_name, dummy_model):
-    """Test that all registered optimizers can be created with their configs."""
-    config = OPTIMIZER_CONFIGS[opt_name]
-    config.pop("opt_cls")
-    try:
-        optimizer_class_and_kwargs = get_optimizer(config)
-        assert optimizer_class_and_kwargs is not None
-    except ValueError as e:
-        assert "Unknown optimizer" in str(e)
-        return
-    optimizer_class = optimizer_class_and_kwargs[0]
-    opt_inst = optimizer_class(dummy_model.parameters(), **optimizer_class_and_kwargs[1])
-    assert isinstance(opt_inst, optim.Optimizer)
-    assert len(list(opt_inst.param_groups)) == 1
-
-    for key in ["lr", "weight_decay", "betas", "eps", "momentum", "dampening", "nesterov", "amsgrad"]:
-        if key in config:
-            assert opt_inst.param_groups[0][key] == config[key], f"{key} mismatch"
-
-
-@pytest.mark.parametrize("opt_name, opt_cls", REGISTRY_CONFIG.items())
-def test_registered_optimizer(opt_name, opt_cls):
-    """Test that the optimizer registerd correctly."""
-    registry.optimizer(opt_name)(opt_cls)
-    optimizer_class = get_optimizer_cls(opt_name)
-    assert optimizer_class is not None
-    assert optimizer_class == opt_cls

From c53d7b370c1762ce32323de00d67e5667535c4e8 Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Thu, 11 Dec 2025 13:01:39 +0530
Subject: [PATCH 11/14] Delete
 QEfficient/finetune/experimental/tests/test_callback.py

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 .../experimental/tests/test_callback.py       | 66 -------------------
 1 file changed, 66 deletions(-)
 delete mode 100644 QEfficient/finetune/experimental/tests/test_callback.py

diff --git a/QEfficient/finetune/experimental/tests/test_callback.py b/QEfficient/finetune/experimental/tests/test_callback.py
deleted file mode 100644
index 18ec3978d..000000000
--- a/QEfficient/finetune/experimental/tests/test_callback.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# -----------------------------------------------------------------------------
-#
-# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
-# SPDX-License-Identifier: BSD-3-Clause
-#
-# -----------------------------------------------------------------------------
-
-import pytest
-from transformers import TrainerCallback
-
-from QEfficient.finetune.experimental.core.callbacks import create_callbacks
-from QEfficient.finetune.experimental.core.component_registry import registry
-
-
-class ModelSummaryCallback(TrainerCallback):
-    def __init__(self):
-        pass
-
-
-# Setup test data
-CALLBACK_CONFIGS = {
-    "early_stopping": {
-        "name": "early_stopping",
-        "early_stopping_patience": 3,
-        "early_stopping_threshold": 0.001,
-    },
-    "tensorboard": {"name": "tensorboard", "tb_writer": "SummaryWriter"},
-    "model_summary": {
-        "name": "model_summary",
-        "max_depth": 1,
-    },
-}
-
-REGISTRY_CALLBACK_CONFIGS = {
-    "model_summary": {
-        "name": "model_summary",
-        "max_depth": 1,
-        "callback_class": ModelSummaryCallback,
-    },
-}
-
-
-@pytest.mark.parametrize("callback_name", CALLBACK_CONFIGS.keys())
-def test_callbacks(callback_name):
-    """Test that registered callbacks that can be created with their configs."""
-    # Create callbacks using the factory
-    config = CALLBACK_CONFIGS[callback_name]
-    try:
-        callback_inst = create_callbacks(**config)
-    except ValueError as e:
-        assert "Unknown callback" in str(e)
-        return
-    if hasattr(callback_inst, "callback"):
-        assert callback_inst.callback is not None
-    else:
-        assert callback_inst is not None
-        assert isinstance(callback_inst, TrainerCallback)
-
-
-@pytest.mark.parametrize("callback_name,callback_class", REGISTRY_CALLBACK_CONFIGS.items())
-def test_callbacks_registery(callback_name, callback_class):
-    """Test that a callback registered correctly."""
-    registry.callback(callback_name)(callback_class)
-    callback = registry.get_callback(callback_name)
-    assert callback is not None
-    assert callback == callback_class

From b75e8b78e5551835f10df9dd48c52a2363346b02 Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Thu, 11 Dec 2025 13:02:18 +0530
Subject: [PATCH 12/14] Update optimizer.py

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 .../finetune/experimental/core/optimizer.py   | 41 -------------------
 1 file changed, 41 deletions(-)

diff --git a/QEfficient/finetune/experimental/core/optimizer.py b/QEfficient/finetune/experimental/core/optimizer.py
index 2f77ce285..d647b73a6 100644
--- a/QEfficient/finetune/experimental/core/optimizer.py
+++ b/QEfficient/finetune/experimental/core/optimizer.py
@@ -4,44 +4,3 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
-
-"""
-Optimizer components for the training system.
-"""
-
-from typing import Type
-
-import torch.optim as optim
-from torch.optim import Optimizer
-
-from QEfficient.finetune.experimental.core.component_registry import registry
-
-registry.optimizer("Adam")(optim.Adam)
-registry.optimizer("AdamW")(optim.AdamW)
-registry.optimizer("SGD")(optim.SGD)
-
-
-def get_optimizer_cls(optimizer_name: str) -> Type[Optimizer]:
-    """
-    Get optimizer class from registry.
-    Args: optimizer_name: Name of the optimizer to retrieve.
-    Returns: Optimizer class.
-    Raises: ValueError: If optimizer name is not found in registry.
-    """
-    optimizer_cls = registry.get_optimizer(optimizer_name)
-    if optimizer_cls is None:
-        raise ValueError(f"Unknown optimizer: {optimizer_name}")
-    return optimizer_cls
-
-
-def get_optimizer(opt_config):
-    """
-    Create optimizer from config.
-    Args: opt_config: Dictionary containing optimizer configuration.
-    Returns: Tuple of optimizer class and its arguments.
-    """
-    opt_name = opt_config.pop("optimizer_name")
-    opt_cls = get_optimizer_cls(opt_name)
-    opt_config["lr"] = float(opt_config["lr"])
-    optimizer_cls_and_kwargs = (opt_cls, opt_config)
-    return optimizer_cls_and_kwargs

From 13e327434c4243310b3cca38298138ac5a929d55 Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Thu, 11 Dec 2025 13:03:17 +0530
Subject: [PATCH 13/14] Update callbacks.py

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 .../finetune/experimental/core/callbacks.py   | 199 ------------------
 1 file changed, 199 deletions(-)

diff --git a/QEfficient/finetune/experimental/core/callbacks.py b/QEfficient/finetune/experimental/core/callbacks.py
index 30659e3bb..d647b73a6 100644
--- a/QEfficient/finetune/experimental/core/callbacks.py
+++ b/QEfficient/finetune/experimental/core/callbacks.py
@@ -4,202 +4,3 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
-
-import json
-import os
-from typing import Any, Dict, Optional
-
-from transformers import (
-    DefaultFlowCallback,
-    EarlyStoppingCallback,
-    PrinterCallback,
-    ProgressCallback,
-    TrainingArguments,
-)
-from transformers.integrations.integration_utils import TensorBoardCallback
-from transformers.trainer_callback import TrainerCallback, TrainerControl, TrainerState
-
-from QEfficient.finetune.experimental.core.component_registry import registry
-from QEfficient.finetune.experimental.core.utils.profiler_utils import (
-    get_op_verifier_ctx,
-    init_qaic_profiling,
-    stop_qaic_profiling,
-)
-
-registry.callback("early_stopping")(EarlyStoppingCallback)
-registry.callback("printer")(PrinterCallback)
-registry.callback("default_flow")(DefaultFlowCallback)
-registry.callback("tensorboard")(TensorBoardCallback)
-
-
-@registry.callback("enhanced_progressbar")
-class EnhancedProgressCallback(ProgressCallback):
-    """
-    A [`TrainerCallback`] that displays the progress of training or evaluation.
-    You can modify `max_str_len` to control how long strings are truncated when logging.
-    """
-
-    def __init__(self, *args, **kwargs):
-        """
-        Initialize the callback with optional max_str_len parameter to control string truncation length.
-
-        Args:
-            max_str_len (`int`):
-                Maximum length of strings to display in logs.
-                Longer strings will be truncated with a message.
-        """
-        super().__init__(*args, **kwargs)
-
-    def on_train_begin(self, args, state, control, **kwargs):
-        """Set progress bar description at the start of training."""
-        super().on_train_begin(args, state, control, **kwargs)
-        if self.training_bar is not None:
-            self.training_bar.set_description("Training Progress")
-
-    def on_log(self, args, state, control, logs=None, **kwargs):
-        """
-        Override the default `on_log` behavior during training to display
-        the current epoch number, loss, and learning rate in the logs.
-        """
-        if state.is_world_process_zero and self.training_bar is not None:
-            # make a shallow copy of logs so we can mutate the fields copied
-            # but avoid doing any value pickling.
-            shallow_logs = {}
-            for k, v in logs.items():
-                if isinstance(v, str) and len(v) > self.max_str_len:
-                    shallow_logs[k] = (
-                        f"[String too long to display, length: {len(v)} > {self.max_str_len}. "
-                        "Consider increasing `max_str_len` if needed.]"
-                    )
-                else:
-                    shallow_logs[k] = v
-            _ = shallow_logs.pop("total_flos", None)
-            # round numbers so that it looks better in console
-            if "epoch" in shallow_logs:
-                shallow_logs["epoch"] = round(shallow_logs["epoch"], 2)
-
-            updated_dict = {}
-            if "epoch" in shallow_logs:
-                updated_dict["epoch"] = shallow_logs["epoch"]
-            if "loss" in shallow_logs:
-                updated_dict["loss"] = shallow_logs["loss"]
-            if "learning_rate" in shallow_logs:
-                updated_dict["lr"] = shallow_logs["learning_rate"]
-            self.training_bar.set_postfix(updated_dict)
-
-
-@registry.callback("json_logger")
-class JSONLoggerCallback(TrainerCallback):
-    """
-    A [`TrainerCallback`] that logs training and evaluation metrics to a JSON file.
-    """
-
-    def __init__(self, log_path=None, *args, **kwargs):
-        """
-        Initialize the callback with the path to the JSON log file.
-
-        Args:
-            log_path (`str`):
-                Path to the jsonl file where logs will be saved.
-        """
-        super().__init__(*args, **kwargs)
-        if log_path is None:
-            log_path = os.path.join(os.environ.get("OUTPUT_DIR", "./"), "training_logs.jsonl")
-        self.log_path = log_path
-        # Ensure the log file is created and empty
-        with open(self.log_path, "w") as _:
-            pass
-
-    def on_log(
-        self,
-        args: TrainingArguments,
-        state: TrainerState,
-        control: TrainerControl,
-        logs: Optional[Dict] = None,
-        **kwargs,
-    ):
-        """Append sanitized log metrics (including global_step) to a JSONL file."""
-        if logs is None:
-            return
-        logs.pop("entropy", None)
-        logs.pop("mean_token_accuracy", None)
-        if state.global_step:
-            logs["global_step"] = state.global_step
-        if logs is not None:
-            with open(self.log_path, "a") as f:
-                json_line = json.dumps(logs, separators=(",", ":"))
-                f.write(json_line + "\n")
-
-
-@registry.callback("qaic_profiler_callback")
-class QAICProfilerCallback(TrainerCallback):
-    """Callback to profile QAIC devices over a specified training step range."""
-
-    def __init__(self, *args, **kwargs):
-        """
-        Initialize QAIC profiler settings (start/end steps and target device IDs).
-        """
-
-        self.start_step = kwargs.get("start_step", -1)
-        self.end_step = kwargs.get("end_step", -1)
-        self.device_ids = kwargs.get("device_ids", [0])
-
-    def on_step_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
-        """
-        Event called at the beginning of a training step. If using gradient accumulation, one training step might take
-        several inputs.
-        """
-        if state.global_step == self.start_step:
-            for device_id in self.device_ids:
-                init_qaic_profiling(True, f"qaic:{device_id}")
-        elif state.global_step == self.end_step:
-            for device_id in self.device_ids:
-                stop_qaic_profiling(True, f"qaic:{device_id}")
-
-
-@registry.callback("qaic_op_by_op_verifier_callback")
-class QAICOpByOpVerifierCallback(TrainerCallback):
-    """Callback to verify QAIC operations step-by-step during a specified training range."""
-
-    def __init__(self, *args, **kwargs):
-        """ "
-        Initialize QAIC Op-by-Op verifier callback with profiling and tolerance settings.
-        """
-        self.start_step = kwargs.get("start_step", -1)
-        self.end_step = kwargs.get("end_step", -1)
-        self.trace_dir = kwargs.get("trace_dir", "qaic_op_by_op_traces")
-        self.atol = kwargs.get("atol", 1e-1)
-        self.rtol = kwargs.get("rtol", 1e-5)
-
-    def on_step_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
-        """
-        Event called at the beginning of a training step. If using gradient accumulation, one training step might take
-        several inputs.
-        """
-        if self.start_step <= state.global_step < self.end_step:
-            self.op_verifier_ctx_step = get_op_verifier_ctx(
-                use_op_by_op_verifier=True,
-                device_type="qaic",
-                dump_dir=self.trace_dir,
-                step=state.global_step,
-                atol=self.atol,
-                rtol=self.rtol,
-            )
-            self.op_verifier_ctx_step.__enter__()
-
-    def on_step_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
-        """
-        Event called at the end of a training step. If using gradient accumulation, one training step might take
-        several inputs.
-        """
-        if self.start_step <= state.global_step < self.end_step:
-            if self.op_verifier_ctx_step is not None:
-                self.op_verifier_ctx_step.__exit__(None, None, None)
-
-
-def create_callbacks(name: str, **kwargs) -> Any:
-    """Create a callback instance."""
-    callback_class = registry.get_callback(name)
-    if callback_class is None:
-        raise ValueError(f"Unknown callback: {name}. Available: {registry.list_callbacks()}")
-    return callback_class(**kwargs)

From 54c3bbaf32cb5e71cc0f7740713eaff2036d7daf Mon Sep 17 00:00:00 2001
From: Tanisha Chawada <tchawada@qti.qualcomm.com>
Date: Thu, 11 Dec 2025 14:44:24 +0530
Subject: [PATCH 14/14] Update config_manager.py

Signed-off-by: Tanisha Chawada <tchawada@qti.qualcomm.com>
---
 QEfficient/finetune/experimental/core/config_manager.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/QEfficient/finetune/experimental/core/config_manager.py b/QEfficient/finetune/experimental/core/config_manager.py
index b28c2e1e3..244967f39 100644
--- a/QEfficient/finetune/experimental/core/config_manager.py
+++ b/QEfficient/finetune/experimental/core/config_manager.py
@@ -602,6 +602,7 @@ def validate_config(self) -> None:
 
         cfg = self.config
         model = getattr(cfg, "model", {})
+        optimizers = getattr(cfg, "optimizers", {})
         dataset = getattr(cfg, "dataset", {})
         training = getattr(cfg, "training", {})
 
@@ -683,7 +684,8 @@ def validate_config(self) -> None:
                 backend not in {"qccl", "nccl", "gloo", None},
                 "training.ddp_config.ddp_backend must be one of {'qccl','nccl','gloo'} or omitted.",
             )
-
+        # -----------Optimizers----------
+        self._push(errors, float(optimizers.get("lr", 0)) <= 0, "optimizer.lr must be positive.")
         # ---------- Final ----------
         if errors:
             # Join messages with bullet points for readability