diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index e1948205264a..f808c67ac308 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -5626,7 +5626,7 @@ def get_batch_samples( # In the DataParallel case, convert the scalar tensor into a 1-dim tensor num_items_in_batch = num_items_in_batch.unsqueeze(0) # Divide by number of devices with the same batch - if pc := self.accelerator.parallelism_config: + if pc := getattr(self.accelerator, "parallelism_config", None): num_items_in_batch = num_items_in_batch // pc.non_data_parallel_size return batch_samples, num_items_in_batch