From f6c80060bdee836f4f6643ee38a6b7e5f535cc8a Mon Sep 17 00:00:00 2001 From: Tuan Nguyen Date: Wed, 25 May 2022 13:38:01 -0400 Subject: [PATCH 1/7] Customed save checkpoints for transformers trainer classes; refactoring --- .../transformers/question_answering.py | 2 +- .../transformers/sparsification/__init__.py | 1 + .../sparsification/question_answering.py | 6 +-- .../transformers/sparsification/trainer.py | 38 ++++++++++++++++++- 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/src/sparseml/transformers/question_answering.py b/src/sparseml/transformers/question_answering.py index 0dc9ccd70f6..9b6d9e4ebfe 100644 --- a/src/sparseml/transformers/question_answering.py +++ b/src/sparseml/transformers/question_answering.py @@ -40,7 +40,6 @@ EvalPrediction, HfArgumentParser, PreTrainedTokenizerFast, - TrainingArguments, default_data_collator, set_seed, ) @@ -51,6 +50,7 @@ from sparseml.transformers.sparsification import ( QuestionAnsweringTrainer, postprocess_qa_predictions, + TrainingArguments, ) from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src diff --git a/src/sparseml/transformers/sparsification/__init__.py b/src/sparseml/transformers/sparsification/__init__.py index 61a91e00a04..b8735ea4c3d 100644 --- a/src/sparseml/transformers/sparsification/__init__.py +++ b/src/sparseml/transformers/sparsification/__init__.py @@ -21,3 +21,4 @@ from .question_answering import * from .trainer import * +from .training_args import * diff --git a/src/sparseml/transformers/sparsification/question_answering.py b/src/sparseml/transformers/sparsification/question_answering.py index c908074ba71..ce40b587637 100644 --- a/src/sparseml/transformers/sparsification/question_answering.py +++ b/src/sparseml/transformers/sparsification/question_answering.py @@ -31,10 +31,10 @@ import numpy as np from torch.nn import Module from tqdm.auto import tqdm -from transformers import Trainer, is_torch_tpu_available +from transformers import is_torch_tpu_available from transformers.trainer_utils import PredictionOutput -from sparseml.transformers.sparsification.trainer import TrainerInterface +from sparseml.transformers.sparsification.trainer import TrainerInterface, TransformersTrainer if is_torch_tpu_available(): @@ -51,7 +51,7 @@ _LOGGER = logging.getLogger(__name__) -class _QuestionAnsweringTrainer(Trainer): +class _QuestionAnsweringTrainer(TransformersTrainer): """ Trainer implementation for Question-Answering processing """ diff --git a/src/sparseml/transformers/sparsification/trainer.py b/src/sparseml/transformers/sparsification/trainer.py index 2d0dcd1716e..5230150b8cf 100644 --- a/src/sparseml/transformers/sparsification/trainer.py +++ b/src/sparseml/transformers/sparsification/trainer.py @@ -28,7 +28,8 @@ import torch from torch import distributed as dist from torch.nn import Module -from transformers import Trainer as TransformersTrainer +from torch.utils.data import RandomSampler +from transformers import Trainer as _TransformersTrainer from transformers import TrainerCallback, TrainerControl, TrainingArguments from transformers.file_utils import WEIGHTS_NAME from transformers.integrations import TensorBoardCallback @@ -51,6 +52,7 @@ "TrainerInterface", "Trainer", "DisableHalfPrecisionCallback", + "TransformersTrainer" ] @@ -855,6 +857,38 @@ def _generate_apply_manager_params(self, kwargs) -> Tuple[Optional[str], float]: return checkpoint, epoch +class TransformersTrainer(_TransformersTrainer): + """ + A transformers trainer class with customed behaviors that can be shared + by all trainers inside SparseML + """ + def _save_checkpoint(self, model, trial, metrics=None): + super()._save_checkpoint(model, trial, metrics=metrics) + if ( + self.args.metric_for_best_model is None + or self.args.best_model_after_epoch is None + ): + return + + if (self.state.epoch > self.args.best_model_after_epoch): + metric_to_check = self.args.metric_for_best_model + if not metric_to_check.startswith("eval_"): + metric_to_check = f"eval_{metric_to_check}" + metric_value = metrics[metric_to_check] + + operator = np.greater if self.args.greater_is_better else np.less + if ( + self.state.best_metric is None + or self.state.best_model_checkpoint is None + or operator(metric_value, self.state.best_metric) + ): + self.state.best_metric = metric_value + self.state.best_model_checkpoint = output_dir + else: + self.state.best_metric = None + self.state.best_model_checkpoint = None + + class Trainer(TrainerInterface, TransformersTrainer): """ Training implementation for running sparsification recipes with transformers flows. @@ -924,7 +958,7 @@ def _remove_unused_columns( self._signature_columns += ["label", "label_ids"] return super()._remove_unused_columns(dataset, description) - + class DisableHalfPrecisionCallback(TrainerCallback): """ From 725edc572fd7ae756d67cc2d6eb13373f198d780 Mon Sep 17 00:00:00 2001 From: Tuan Nguyen Date: Wed, 25 May 2022 13:54:12 -0400 Subject: [PATCH 2/7] Add transformers training args for SparseML --- .../sparsification/training_args.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/sparseml/transformers/sparsification/training_args.py diff --git a/src/sparseml/transformers/sparsification/training_args.py b/src/sparseml/transformers/sparsification/training_args.py new file mode 100644 index 00000000000..19205020565 --- /dev/null +++ b/src/sparseml/transformers/sparsification/training_args.py @@ -0,0 +1,18 @@ +from transformers import TrainingArguments as HFTrainingArgs + +__all__ = ["TrainingArguments"] + +@dataclass +class TrainingArguments(HFTrainingArgs): + """ + Training arguments specific to SparseML Transformers workflow + + :param best_model_after_epoch (`int`, *optional*, defaults to None): + The epoch after which best model will be saved; used in conjunction with `load_best_model_at_end` and + `metric_for_best_model` training arguments + """ + best_model_after_epoch: int = field( + default=None, + metadata={"help": "Epoch after which best model will be saved."}, + ) + From 91546968c7e5ce2e5d8ddde3a355c094953e45da Mon Sep 17 00:00:00 2001 From: Tuan Nguyen Date: Fri, 3 Jun 2022 00:36:50 -0400 Subject: [PATCH 3/7] Reuse removed unused columns --- .../sparsification/question_answering.py | 29 +------ .../transformers/sparsification/trainer.py | 81 ++++++++++--------- 2 files changed, 45 insertions(+), 65 deletions(-) diff --git a/src/sparseml/transformers/sparsification/question_answering.py b/src/sparseml/transformers/sparsification/question_answering.py index ce40b587637..1950607c9e0 100644 --- a/src/sparseml/transformers/sparsification/question_answering.py +++ b/src/sparseml/transformers/sparsification/question_answering.py @@ -34,7 +34,10 @@ from transformers import is_torch_tpu_available from transformers.trainer_utils import PredictionOutput -from sparseml.transformers.sparsification.trainer import TrainerInterface, TransformersTrainer +from sparseml.transformers.sparsification.trainer import ( + TrainerInterface, + TransformersTrainer, +) if is_torch_tpu_available(): @@ -224,30 +227,6 @@ def __init__( **kwargs, ) - def _remove_unused_columns( - self, dataset: "datasets.Dataset", description: Optional[str] = None - ): - if ( - self._signature_columns is None - and self.teacher is not None - and self.teacher not in ("disable", "self") - ): - model_signature = inspect.signature(self.model.forward) - model_signature_columns = set(model_signature.parameters.keys()) - - teacher_signature = inspect.signature(self.teacher.forward) - teacher_signature_columns = set(teacher_signature.parameters.keys()) - - self._signature_columns = list( - model_signature_columns | teacher_signature_columns - ) - - # Labels may be named label or label_ids, the default data - # collator handles that. - self._signature_columns += ["label", "label_ids"] - - return super()._remove_unused_columns(dataset, description) - def postprocess_qa_predictions( examples, diff --git a/src/sparseml/transformers/sparsification/trainer.py b/src/sparseml/transformers/sparsification/trainer.py index 5230150b8cf..f39dd84bbe0 100644 --- a/src/sparseml/transformers/sparsification/trainer.py +++ b/src/sparseml/transformers/sparsification/trainer.py @@ -29,7 +29,7 @@ from torch import distributed as dist from torch.nn import Module from torch.utils.data import RandomSampler -from transformers import Trainer as _TransformersTrainer +from transformers import Trainer as HFTransformersTrainer from transformers import TrainerCallback, TrainerControl, TrainingArguments from transformers.file_utils import WEIGHTS_NAME from transformers.integrations import TensorBoardCallback @@ -52,7 +52,7 @@ "TrainerInterface", "Trainer", "DisableHalfPrecisionCallback", - "TransformersTrainer" + "TransformersTrainer", ] @@ -857,11 +857,12 @@ def _generate_apply_manager_params(self, kwargs) -> Tuple[Optional[str], float]: return checkpoint, epoch -class TransformersTrainer(_TransformersTrainer): +class TransformersTrainer(HFTransformersTrainer): """ A transformers trainer class with customed behaviors that can be shared by all trainers inside SparseML """ + def _save_checkpoint(self, model, trial, metrics=None): super()._save_checkpoint(model, trial, metrics=metrics) if ( @@ -870,7 +871,7 @@ def _save_checkpoint(self, model, trial, metrics=None): ): return - if (self.state.epoch > self.args.best_model_after_epoch): + if self.state.epoch > self.args.best_model_after_epoch: metric_to_check = self.args.metric_for_best_model if not metric_to_check.startswith("eval_"): metric_to_check = f"eval_{metric_to_check}" @@ -887,41 +888,6 @@ def _save_checkpoint(self, model, trial, metrics=None): else: self.state.best_metric = None self.state.best_model_checkpoint = None - - -class Trainer(TrainerInterface, TransformersTrainer): - """ - Training implementation for running sparsification recipes with transformers flows. - :param model: the model to use with the trainer and apply sparsification to - :param model_state_path: the state path to the model, - used to load config and tokenizer settings - :param recipe: the recipe, if any, to apply to the model and training - process - :param recipe_args: A json string, csv key=value string, or dictionary containing - arguments to override the root arguments within the recipe such as - learning rate or num epochs - :param teacher: teacher model for distillation. Set to 'self' to distill - from the loaded model or 'disable' to turn off distillation - :param kwargs: key word arguments passed to the parent class - """ - - def __init__( - self, - model: Module, - model_state_path: str, - recipe: Optional[str], - recipe_args: Optional[Union[Dict[str, Any], str]] = None, - teacher: Optional[Union[Module, str]] = None, - **kwargs, - ): - super().__init__( - model=model, - model_state_path=model_state_path, - recipe=recipe, - recipe_args=recipe_args, - teacher=teacher, - **kwargs, - ) def _remove_unused_columns( self, dataset: "datasets.Dataset", description: Optional[str] = None @@ -958,7 +924,42 @@ def _remove_unused_columns( self._signature_columns += ["label", "label_ids"] return super()._remove_unused_columns(dataset, description) - + + +class Trainer(TrainerInterface, TransformersTrainer): + """ + Training implementation for running sparsification recipes with transformers flows. + :param model: the model to use with the trainer and apply sparsification to + :param model_state_path: the state path to the model, + used to load config and tokenizer settings + :param recipe: the recipe, if any, to apply to the modle and training + process + :param recipe_args: A json string, csv key=value string, or dictionary containing + arguments to override the root arguments within the recipe such as + learning rate or num epochs + :param teacher: teacher model for distillation. Set to 'self' to distill + from the loaded model or 'disable' to turn of distillation + :param kwargs: key word arguments passed to the parent class + """ + + def __init__( + self, + model: Module, + model_state_path: str, + recipe: Optional[str], + recipe_args: Optional[Union[Dict[str, Any], str]] = None, + teacher: Optional[Union[Module, str]] = None, + **kwargs, + ): + super().__init__( + model=model, + model_state_path=model_state_path, + recipe=recipe, + recipe_args=recipe_args, + teacher=teacher, + **kwargs, + ) + class DisableHalfPrecisionCallback(TrainerCallback): """ From 578ac0215bf325b326def7896505466dd4e55f9e Mon Sep 17 00:00:00 2001 From: Tuan Nguyen Date: Fri, 3 Jun 2022 10:11:12 -0400 Subject: [PATCH 4/7] Format training args --- .../sparsification/training_args.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/sparseml/transformers/sparsification/training_args.py b/src/sparseml/transformers/sparsification/training_args.py index 19205020565..e9226e3d678 100644 --- a/src/sparseml/transformers/sparsification/training_args.py +++ b/src/sparseml/transformers/sparsification/training_args.py @@ -1,7 +1,22 @@ +# Copyright (c) 2022 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from transformers import TrainingArguments as HFTrainingArgs __all__ = ["TrainingArguments"] + @dataclass class TrainingArguments(HFTrainingArgs): """ @@ -11,8 +26,8 @@ class TrainingArguments(HFTrainingArgs): The epoch after which best model will be saved; used in conjunction with `load_best_model_at_end` and `metric_for_best_model` training arguments """ + best_model_after_epoch: int = field( default=None, metadata={"help": "Epoch after which best model will be saved."}, ) - From e8d426ddca0b68f5f37a7ab3aa50a426e9393f9c Mon Sep 17 00:00:00 2001 From: Tuan Nguyen Date: Fri, 3 Jun 2022 22:23:09 -0400 Subject: [PATCH 5/7] Move distill teacher, recipe args --- .../transformers/question_answering.py | 27 +++-------------- .../sparsification/question_answering.py | 2 -- .../transformers/sparsification/trainer.py | 18 ++--------- .../sparsification/training_args.py | 30 +++++++++++++++++-- .../transformers/text_classification.py | 28 +++-------------- 5 files changed, 38 insertions(+), 67 deletions(-) diff --git a/src/sparseml/transformers/question_answering.py b/src/sparseml/transformers/question_answering.py index 9b6d9e4ebfe..3d6fa1705d0 100644 --- a/src/sparseml/transformers/question_answering.py +++ b/src/sparseml/transformers/question_answering.py @@ -49,8 +49,8 @@ from sparseml.transformers.sparsification import ( QuestionAnsweringTrainer, - postprocess_qa_predictions, TrainingArguments, + postprocess_qa_predictions, ) from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src @@ -90,10 +90,6 @@ class ModelArguments: ) } ) - distill_teacher: Optional[str] = field( - default=None, - metadata={"help": "Teacher model which needs to be a trained QA model"}, - ) config_name: Optional[str] = field( default=None, metadata={ @@ -141,21 +137,6 @@ class DataTrainingArguments: Arguments pertaining to what data to input to our model for training and eval """ - recipe: Optional[str] = field( - default=None, - metadata={ - "help": ( - "Path to a SparseML sparsification recipe, see " - "https://github.com/neuralmagic/sparseml for more information" - ) - }, - ) - recipe_args: Optional[str] = field( - default=None, - metadata={ - "help": "Recipe arguments to be overwritten", - }, - ) dataset_name: Optional[str] = field( default=None, metadata={ @@ -445,7 +426,7 @@ def main(**kwargs): "revision": model_args.model_revision, "use_auth_token": True if model_args.use_auth_token else None, }, - teacher_name_or_path=model_args.distill_teacher, + teacher_name_or_path=training_args.distill_teacher, teacher_kwargs={ "cache_dir": model_args.cache_dir, "use_auth_token": True if model_args.use_auth_token else None, @@ -772,8 +753,8 @@ def compute_metrics(p: EvalPrediction): trainer = QuestionAnsweringTrainer( model=model, model_state_path=model_args.model_name_or_path, - recipe=data_args.recipe, - recipe_args=data_args.recipe_args, + recipe=training_args.recipe, + recipe_args=training_args.recipe_args, metadata_args=metadata_args, teacher=teacher, args=training_args, diff --git a/src/sparseml/transformers/sparsification/question_answering.py b/src/sparseml/transformers/sparsification/question_answering.py index 1950607c9e0..d2cd4778d2d 100644 --- a/src/sparseml/transformers/sparsification/question_answering.py +++ b/src/sparseml/transformers/sparsification/question_answering.py @@ -21,13 +21,11 @@ """ import collections -import inspect import json import logging import os from typing import Any, Dict, List, Optional, Tuple, Union -import datasets import numpy as np from torch.nn import Module from tqdm.auto import tqdm diff --git a/src/sparseml/transformers/sparsification/trainer.py b/src/sparseml/transformers/sparsification/trainer.py index f39dd84bbe0..752a43a4c40 100644 --- a/src/sparseml/transformers/sparsification/trainer.py +++ b/src/sparseml/transformers/sparsification/trainer.py @@ -864,6 +864,8 @@ class TransformersTrainer(HFTransformersTrainer): """ def _save_checkpoint(self, model, trial, metrics=None): + # Call into the save checkpoint by HF Transformers, which saves the + # best metric if required super()._save_checkpoint(model, trial, metrics=metrics) if ( self.args.metric_for_best_model is None @@ -871,21 +873,7 @@ def _save_checkpoint(self, model, trial, metrics=None): ): return - if self.state.epoch > self.args.best_model_after_epoch: - metric_to_check = self.args.metric_for_best_model - if not metric_to_check.startswith("eval_"): - metric_to_check = f"eval_{metric_to_check}" - metric_value = metrics[metric_to_check] - - operator = np.greater if self.args.greater_is_better else np.less - if ( - self.state.best_metric is None - or self.state.best_model_checkpoint is None - or operator(metric_value, self.state.best_metric) - ): - self.state.best_metric = metric_value - self.state.best_model_checkpoint = output_dir - else: + if self.state.epoch <= self.args.best_model_after_epoch: self.state.best_metric = None self.state.best_model_checkpoint = None diff --git a/src/sparseml/transformers/sparsification/training_args.py b/src/sparseml/transformers/sparsification/training_args.py index e9226e3d678..a1aa639ad87 100644 --- a/src/sparseml/transformers/sparsification/training_args.py +++ b/src/sparseml/transformers/sparsification/training_args.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - present / Neuralmagic, Inc. All Rights Reserved. +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,8 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from dataclasses import dataclass, field +from typing import Optional + from transformers import TrainingArguments as HFTrainingArgs + __all__ = ["TrainingArguments"] @@ -23,11 +27,31 @@ class TrainingArguments(HFTrainingArgs): Training arguments specific to SparseML Transformers workflow :param best_model_after_epoch (`int`, *optional*, defaults to None): - The epoch after which best model will be saved; used in conjunction with `load_best_model_at_end` and - `metric_for_best_model` training arguments + The epoch after which best model will be saved; used in conjunction + with `load_best_model_at_end` and `metric_for_best_model` training + arguments """ + distill_teacher: Optional[str] = field( + default=None, + metadata={ + "help": "Teacher model (a trained text classification model)", + }, + ) best_model_after_epoch: int = field( default=None, metadata={"help": "Epoch after which best model will be saved."}, ) + recipe: Optional[str] = field( + default=None, + metadata={ + "help": ( + "Path to a SparseML sparsification recipe, see " + "https://github.com/neuralmagic/sparseml for more information" + ), + }, + ) + recipe_args: Optional[str] = field( + default=None, + metadata={"help": "Recipe arguments to be overwritten"}, + ) diff --git a/src/sparseml/transformers/text_classification.py b/src/sparseml/transformers/text_classification.py index 17ddc8aac3f..a27cc97322d 100644 --- a/src/sparseml/transformers/text_classification.py +++ b/src/sparseml/transformers/text_classification.py @@ -42,7 +42,6 @@ EvalPrediction, HfArgumentParser, PretrainedConfig, - TrainingArguments, default_data_collator, set_seed, ) @@ -50,7 +49,7 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version -from sparseml.transformers.sparsification import Trainer +from sparseml.transformers.sparsification import Trainer, TrainingArguments from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src @@ -94,19 +93,6 @@ class DataTrainingArguments: arguments to be able to specify them on the command line """ - recipe: Optional[str] = field( - default=None, - metadata={ - "help": ( - "Path to a SparseML sparsification recipe, see " - "https://github.com/neuralmagic/sparseml for more information" - ), - }, - ) - recipe_args: Optional[str] = field( - default=None, - metadata={"help": "Recipe arguments to be overwritten"}, - ) task_name: Optional[str] = field( default=None, metadata={ @@ -254,12 +240,6 @@ class ModelArguments: ) } ) - distill_teacher: Optional[str] = field( - default=None, - metadata={ - "help": "Teacher model which must be a trained text classification model" - }, - ) config_name: Optional[str] = field( default=None, metadata={ @@ -481,7 +461,7 @@ def main(**kwargs): "revision": model_args.model_revision, "use_auth_token": True if model_args.use_auth_token else None, }, - teacher_name_or_path=model_args.distill_teacher, + teacher_name_or_path=training_args.distill_teacher, teacher_kwargs={ "cache_dir": model_args.cache_dir, "use_auth_token": True if model_args.use_auth_token else None, @@ -720,9 +700,9 @@ def compute_metrics(p: EvalPrediction): trainer = Trainer( model=model, model_state_path=model_args.model_name_or_path, - recipe=data_args.recipe, + recipe=training_args.recipe, metadata_args=metadata_args, - recipe_args=data_args.recipe_args, + recipe_args=training_args.recipe_args, teacher=teacher, args=training_args, data_args=data_args, From 46259198c0d00a4df22782382f98582d76d4af0f Mon Sep 17 00:00:00 2001 From: Tuan Nguyen Date: Fri, 10 Jun 2022 14:09:23 -0400 Subject: [PATCH 6/7] Simplify MLM, token classification code --- .../transformers/masked_language_modeling.py | 22 ++----------------- .../transformers/token_classification.py | 22 ++----------------- 2 files changed, 4 insertions(+), 40 deletions(-) diff --git a/src/sparseml/transformers/masked_language_modeling.py b/src/sparseml/transformers/masked_language_modeling.py index 326fc03402c..3b4a7733672 100644 --- a/src/sparseml/transformers/masked_language_modeling.py +++ b/src/sparseml/transformers/masked_language_modeling.py @@ -47,14 +47,13 @@ AutoTokenizer, DataCollatorForLanguageModeling, HfArgumentParser, - TrainingArguments, set_seed, ) from transformers.trainer_utils import get_last_checkpoint from transformers.utils import check_min_version from transformers.utils.versions import require_version -from sparseml.transformers.sparsification import Trainer +from sparseml.transformers.sparsification import Trainer, TrainingArguments from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src @@ -108,10 +107,6 @@ class ModelArguments: "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" }, ) - distill_teacher: Optional[str] = field( - default=None, - metadata={"help": "Teacher model which needs to be a trained QA model"}, - ) config_name: Optional[str] = field( default=None, metadata={ @@ -164,19 +159,6 @@ class DataTrainingArguments: training and eval """ - recipe: Optional[str] = field( - default=None, - metadata={ - "help": ( - "Path to a SparseML sparsification recipe, see " - "https://github.com/neuralmagic/sparseml for more information" - ), - }, - ) - recipe_args: Optional[str] = field( - default=None, - metadata={"help": "Recipe arguments to be overwritten"}, - ) dataset_name: Optional[str] = field( default=None, metadata={"help": "The name of the dataset to use (via the datasets library)"}, @@ -490,7 +472,7 @@ def main(**kwargs): "revision": model_args.model_revision, "use_auth_token": True if model_args.use_auth_token else None, }, - teacher_name_or_path=model_args.distill_teacher, + teacher_name_or_path=training_args.distill_teacher, teacher_kwargs={ "cache_dir": model_args.cache_dir, "use_auth_token": True if model_args.use_auth_token else None, diff --git a/src/sparseml/transformers/token_classification.py b/src/sparseml/transformers/token_classification.py index c846ac50e1c..9692c4f8e3d 100644 --- a/src/sparseml/transformers/token_classification.py +++ b/src/sparseml/transformers/token_classification.py @@ -40,14 +40,13 @@ HfArgumentParser, PretrainedConfig, PreTrainedTokenizerFast, - TrainingArguments, set_seed, ) from transformers.trainer_utils import get_last_checkpoint from transformers.utils import check_min_version from transformers.utils.versions import require_version -from sparseml.transformers.sparsification import Trainer +from sparseml.transformers.sparsification import Trainer, TrainingArguments from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src @@ -84,10 +83,6 @@ class ModelArguments: ) } ) - distill_teacher: Optional[str] = field( - default=None, - metadata={"help": "Teacher model which needs to be a trained NER model"}, - ) config_name: Optional[str] = field( default=None, metadata={ @@ -127,19 +122,6 @@ class DataTrainingArguments: training and eval """ - recipe: Optional[str] = field( - default=None, - metadata={ - "help": ( - "Path to a SparseML sparsification recipe, see " - "https://github.com/neuralmagic/sparseml for more information" - ), - }, - ) - recipe_args: Optional[str] = field( - default=None, - metadata={"help": "Recipe arguments to be overwritten"}, - ) task_name: Optional[str] = field( default="ner", metadata={"help": "The name of the task (ner, pos...)."} ) @@ -441,7 +423,7 @@ def get_label_list(labels): "revision": model_args.model_revision, "use_auth_token": True if model_args.use_auth_token else None, }, - teacher_name_or_path=model_args.distill_teacher, + teacher_name_or_path=training_args.distill_teacher, teacher_kwargs={ "cache_dir": model_args.cache_dir, "use_auth_token": True if model_args.use_auth_token else None, From 15e8b3bc5aad62ffad73f8588c3376513129486c Mon Sep 17 00:00:00 2001 From: Tuan Nguyen Date: Mon, 13 Jun 2022 23:13:41 -0400 Subject: [PATCH 7/7] Fix recipe calls in mlm and token cls --- src/sparseml/transformers/masked_language_modeling.py | 4 ++-- src/sparseml/transformers/sparsification/trainer.py | 3 +-- src/sparseml/transformers/token_classification.py | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/sparseml/transformers/masked_language_modeling.py b/src/sparseml/transformers/masked_language_modeling.py index 3b4a7733672..37c6ad3516e 100644 --- a/src/sparseml/transformers/masked_language_modeling.py +++ b/src/sparseml/transformers/masked_language_modeling.py @@ -664,9 +664,9 @@ def compute_metrics(eval_preds): trainer = Trainer( model=model, model_state_path=model_args.model_name_or_path, - recipe=data_args.recipe, + recipe=training_args.recipe, metadata_args=metadata_args, - recipe_args=data_args.recipe_args, + recipe_args=training_args.recipe_args, teacher=teacher, args=training_args, data_args=data_args, diff --git a/src/sparseml/transformers/sparsification/trainer.py b/src/sparseml/transformers/sparsification/trainer.py index 752a43a4c40..847c8b5b8b7 100644 --- a/src/sparseml/transformers/sparsification/trainer.py +++ b/src/sparseml/transformers/sparsification/trainer.py @@ -28,7 +28,6 @@ import torch from torch import distributed as dist from torch.nn import Module -from torch.utils.data import RandomSampler from transformers import Trainer as HFTransformersTrainer from transformers import TrainerCallback, TrainerControl, TrainingArguments from transformers.file_utils import WEIGHTS_NAME @@ -859,7 +858,7 @@ def _generate_apply_manager_params(self, kwargs) -> Tuple[Optional[str], float]: class TransformersTrainer(HFTransformersTrainer): """ - A transformers trainer class with customed behaviors that can be shared + A transformers trainer class with custom behavior that can be shared by all trainers inside SparseML """ diff --git a/src/sparseml/transformers/token_classification.py b/src/sparseml/transformers/token_classification.py index 9692c4f8e3d..6a2764cbf89 100644 --- a/src/sparseml/transformers/token_classification.py +++ b/src/sparseml/transformers/token_classification.py @@ -625,9 +625,9 @@ def compute_metrics(p): trainer = Trainer( model=model, model_state_path=model_args.model_name_or_path, - recipe=data_args.recipe, + recipe=training_args.recipe, metadata_args=metadata_args, - recipe_args=data_args.recipe_args, + recipe_args=training_args.recipe_args, teacher=teacher, args=training_args, data_args=data_args,