From 199217c1ced6814d040b37bd6c592c64f67c4307 Mon Sep 17 00:00:00 2001 From: Charly21r Date: Mon, 23 Mar 2026 16:39:14 +0100 Subject: [PATCH 1/6] Fix NotebookProgressCallback to allow evaluate() before and after train --- src/transformers/utils/notebook.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/transformers/utils/notebook.py b/src/transformers/utils/notebook.py index ecbe8271fe13..4309c3966e2a 100644 --- a/src/transformers/utils/notebook.py +++ b/src/transformers/utils/notebook.py @@ -351,7 +351,10 @@ def on_log(self, args, state, control, logs=None, **kwargs): tt.write_line(values) def on_evaluate(self, args, state, control, metrics=None, **kwargs): - tt = _require(self.training_tracker, "on_train_begin must be called before on_evaluate") + if self.training_tracker is None: + return control + + tt = self.training_tracker values = {"Training Loss": "No log", "Validation Loss": "No log"} for log in reversed(state.log_history): From 2e20fcea3b91f4bef141c4945295cc95a3417158 Mon Sep 17 00:00:00 2001 From: Charly21r Date: Mon, 23 Mar 2026 16:40:44 +0100 Subject: [PATCH 2/6] Add unit test for NotebookProgressCallback evaluating before and after training --- tests/trainer/test_trainer_callback.py | 71 ++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tests/trainer/test_trainer_callback.py b/tests/trainer/test_trainer_callback.py index 0d132a9051f5..c640a4f43814 100644 --- a/tests/trainer/test_trainer_callback.py +++ b/tests/trainer/test_trainer_callback.py @@ -1269,3 +1269,74 @@ def state(self): self.assertEqual(instance.name, "test") self.assertEqual(instance.counter, 5) + + +@require_torch +class NotebookProgressCallbackTest(unittest.TestCase): + """Tests for NotebookProgressCallback behavior in notebook environments.""" + + def setUp(self): + self.output_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.output_dir) + + def _create_trainer(self): + train_dataset = RegressionDataset(length=16) + eval_dataset = RegressionDataset(length=16) + config = RegressionModelConfig(a=0, b=0) + model = RegressionPreTrainedModel(config) + + args = TrainingArguments( + self.output_dir, + per_device_train_batch_size=2, + per_device_eval_batch_size=2, + num_train_epochs=1, + logging_strategy="no", + report_to=[], + eval_strategy="epoch", + disable_tqdm=True, + ) + + from transformers.utils.notebook import NotebookProgressCallback + + trainer = Trainer( + model=model, + args=args, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + callbacks=[NotebookProgressCallback()], # force it + ) + return trainer + + def test_evaluate_before_training(self): + """Calling evaluate() before training does not crash and returns metrics.""" + trainer = self._create_trainer() + metrics = trainer.evaluate() + self.assertIn("eval_loss", metrics) + # Check that the notebook callback exists in callback handler + from transformers.utils.notebook import NotebookProgressCallback + + cb = next( + (c for c in trainer.callback_handler.callbacks if isinstance(c, NotebookProgressCallback)), + None, + ) + self.assertIsNotNone(cb) + + def test_evaluate_after_training(self): + """Calling evaluate() after training does not crash and returns metrics.""" + trainer = self._create_trainer() + trainer.train() + metrics = trainer.evaluate() + self.assertIn("eval_loss", metrics) + + def test_multiple_evaluate_calls(self): + """Calling evaluate() multiple times in a row works in notebook environment.""" + trainer = self._create_trainer() + metrics1 = trainer.evaluate() + trainer.train() + metrics2 = trainer.evaluate() + metrics3 = trainer.evaluate() + self.assertIn("eval_loss", metrics1) + self.assertIn("eval_loss", metrics2) + self.assertIn("eval_loss", metrics3) From cb1df8d89857378a7c381d7f028df4c0116fbafc Mon Sep 17 00:00:00 2001 From: Charly21r Date: Mon, 23 Mar 2026 18:06:01 +0100 Subject: [PATCH 3/6] Skip NotebookProgressCallback tests when IPython is not installed --- tests/trainer/test_trainer_callback.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/trainer/test_trainer_callback.py b/tests/trainer/test_trainer_callback.py index c640a4f43814..a22faac2588f 100644 --- a/tests/trainer/test_trainer_callback.py +++ b/tests/trainer/test_trainer_callback.py @@ -23,6 +23,7 @@ - Built-in callbacks (DefaultFlowCallback, EarlyStoppingCallback, etc.) """ +import importlib.util import os import shutil import tempfile @@ -53,6 +54,9 @@ from .trainer_test_utils import RegressionDataset, RegressionModelConfig, RegressionPreTrainedModel +IPYTHON_AVAILABLE = importlib.util.find_spec("IPython") is not None + + # ============================================================================= # Test Callback Implementations # ============================================================================= @@ -1272,6 +1276,7 @@ def state(self): @require_torch +@unittest.skipUnless(IPYTHON_AVAILABLE, "IPython is required for NotebookProgressCallback") class NotebookProgressCallbackTest(unittest.TestCase): """Tests for NotebookProgressCallback behavior in notebook environments.""" From a2d67e2437e1a99999cfeff0a7ea4f0a02345c46 Mon Sep 17 00:00:00 2001 From: Charly21r Date: Tue, 7 Apr 2026 18:00:53 +0200 Subject: [PATCH 4/6] Display eval metrics when training tracker is None on NotebookProgressCallback --- src/transformers/utils/notebook.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/transformers/utils/notebook.py b/src/transformers/utils/notebook.py index 4309c3966e2a..c07083f75925 100644 --- a/src/transformers/utils/notebook.py +++ b/src/transformers/utils/notebook.py @@ -351,10 +351,7 @@ def on_log(self, args, state, control, logs=None, **kwargs): tt.write_line(values) def on_evaluate(self, args, state, control, metrics=None, **kwargs): - if self.training_tracker is None: - return control - - tt = self.training_tracker + self.first_column = "Epoch" if args.eval_strategy == IntervalStrategy.EPOCH else "Step" values = {"Training Loss": "No log", "Validation Loss": "No log"} for log in reversed(state.log_history): @@ -384,11 +381,18 @@ def on_evaluate(self, args, state, control, metrics=None, **kwargs): # Single dataset name = "Validation Loss" values[name] = v - tt.write_line(values) - tt.remove_child() + + if self.training_tracker is not None: + tt = self.training_tracker + tt.write_line(values) + tt.remove_child() + # Evaluation takes a long time so we should force the next update. + self._force_next_update = True + else: + # No training tracker, but still show the metrics + disp.display(disp.HTML(text_to_html_table([list(values.keys()), list(values.values())]))) + self.prediction_bar = None - # Evaluation takes a long time so we should force the next update. - self._force_next_update = True def on_train_end(self, args, state, control, **kwargs): tt = _require(self.training_tracker, "on_train_begin must be called before on_train_end") From 7a01ca96670dc9d8dd752d30844f8e8b8926b328 Mon Sep 17 00:00:00 2001 From: Charly21r Date: Thu, 9 Apr 2026 20:09:43 +0200 Subject: [PATCH 5/6] Add is_ipython_available and require_ipython test decorator --- src/transformers/testing_utils.py | 6 ++++++ src/transformers/utils/__init__.py | 1 + src/transformers/utils/import_utils.py | 5 +++++ tests/trainer/test_trainer_callback.py | 8 ++------ 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 6e35a836db16..863242a695c6 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -106,6 +106,7 @@ is_hadamard_available, is_hqq_available, is_huggingface_hub_greater_or_equal, + is_ipython_available, is_jinja_available, is_jmespath_available, is_jumanpp_available, @@ -1179,6 +1180,11 @@ def require_faiss(test_case): return unittest.skipUnless(is_faiss_available(), "test requires `faiss`")(test_case) +def require_ipython(test_case): + """Decorator marking a test that requires IPython. These tests are skipped when IPython isn't installed.""" + return unittest.skipUnless(is_ipython_available(), "test requires `IPython`")(test_case) + + def require_optuna(test_case): """ Decorator marking a test that requires optuna. diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 3f5c7cac386b..d12e0b277c1b 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -150,6 +150,7 @@ is_hqq_available, is_huggingface_hub_greater_or_equal, is_in_notebook, + is_ipython_available, is_jinja_available, is_jmespath_available, is_jumanpp_available, diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index 1e1ac2545f05..de11d23cbecf 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -1540,6 +1540,11 @@ def msg_callable(): torch._check_with(error_type, cond, msg_callable) +@lru_cache +def is_ipython_available() -> bool: + return importlib.util.find_spec("IPython") is not None + + @lru_cache def is_in_notebook() -> bool: try: diff --git a/tests/trainer/test_trainer_callback.py b/tests/trainer/test_trainer_callback.py index a22faac2588f..db0ccd56b1a1 100644 --- a/tests/trainer/test_trainer_callback.py +++ b/tests/trainer/test_trainer_callback.py @@ -23,7 +23,6 @@ - Built-in callbacks (DefaultFlowCallback, EarlyStoppingCallback, etc.) """ -import importlib.util import os import shutil import tempfile @@ -44,7 +43,7 @@ is_torch_available, ) from transformers.integrations.integration_utils import KubeflowCallback, SwanLabCallback -from transformers.testing_utils import require_torch +from transformers.testing_utils import require_ipython, require_torch from transformers.trainer_callback import CallbackHandler, ExportableState, TrainerControl @@ -54,9 +53,6 @@ from .trainer_test_utils import RegressionDataset, RegressionModelConfig, RegressionPreTrainedModel -IPYTHON_AVAILABLE = importlib.util.find_spec("IPython") is not None - - # ============================================================================= # Test Callback Implementations # ============================================================================= @@ -1276,7 +1272,7 @@ def state(self): @require_torch -@unittest.skipUnless(IPYTHON_AVAILABLE, "IPython is required for NotebookProgressCallback") +@require_ipython class NotebookProgressCallbackTest(unittest.TestCase): """Tests for NotebookProgressCallback behavior in notebook environments.""" From 2d987166756408268d910f42a98ff68886a161d5 Mon Sep 17 00:00:00 2001 From: Charly21r Date: Fri, 10 Apr 2026 17:30:09 +0200 Subject: [PATCH 6/6] Filter model_preparation_time metric and add code comments in on_eval --- src/transformers/utils/notebook.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/transformers/utils/notebook.py b/src/transformers/utils/notebook.py index c07083f75925..1c7fb7a77bea 100644 --- a/src/transformers/utils/notebook.py +++ b/src/transformers/utils/notebook.py @@ -351,6 +351,8 @@ def on_log(self, args, state, control, logs=None, **kwargs): tt.write_line(values) def on_evaluate(self, args, state, control, metrics=None, **kwargs): + # Recompute first_column here since on_evaluate can be called before on_train_begin, + # where it is normally initialized. self.first_column = "Epoch" if args.eval_strategy == IntervalStrategy.EPOCH else "Step" values = {"Training Loss": "No log", "Validation Loss": "No log"} @@ -374,6 +376,8 @@ def on_evaluate(self, args, state, control, metrics=None, **kwargs): _ = metrics.pop(f"{metric_key_prefix}_runtime", None) _ = metrics.pop(f"{metric_key_prefix}_samples_per_second", None) _ = metrics.pop(f"{metric_key_prefix}_steps_per_second", None) + _ = metrics.pop(f"{metric_key_prefix}_model_preparation_time", None) + for k, v in metrics.items(): splits = k.split("_") name = " ".join([part.capitalize() for part in splits[1:]])