From 77ef609c4faa690fcbeea660904b3210ad31c491 Mon Sep 17 00:00:00 2001 From: Nouamane Tazi Date: Sun, 12 Jun 2022 12:17:47 +0000 Subject: [PATCH 1/4] avoid loading TFPreTrainedModel to check model's type --- src/evaluate/evaluator.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/evaluate/evaluator.py b/src/evaluate/evaluator.py index 7e6b5b258..7df06fef4 100644 --- a/src/evaluate/evaluator.py +++ b/src/evaluate/evaluator.py @@ -28,7 +28,7 @@ SCIPY_AVAILABLE = False try: - from transformers import Pipeline, PreTrainedModel, PreTrainedTokenizer, TFPreTrainedModel, pipeline + from transformers import Pipeline, PreTrainedTokenizer, pipeline from transformers.pipelines import SUPPORTED_TASKS as SUPPORTED_PIPELINE_TASKS from transformers.pipelines import TASK_ALIASES from transformers.pipelines import check_task as check_pipeline_task @@ -244,10 +244,9 @@ def compute( ) # Prepare pipeline. - if ( - isinstance(model_or_pipeline, PreTrainedModel) - or isinstance(model_or_pipeline, TFPreTrainedModel) - or isinstance(model_or_pipeline, str) + if isinstance(model_or_pipeline, str) or ( + hasattr(model_or_pipeline, "__class__") + and model_or_pipeline.__class__.__module__.startswith("transformers.models") ): pipe = pipeline(self.task, model=model_or_pipeline, tokenizer=tokenizer) else: From 38174b0b40f3037dd26d9b4a4cefd96c9980ce17 Mon Sep 17 00:00:00 2001 From: Nouamane Tazi Date: Mon, 13 Jun 2022 08:35:22 +0000 Subject: [PATCH 2/4] fix compute() for user-defined models --- src/evaluate/evaluator.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/evaluate/evaluator.py b/src/evaluate/evaluator.py index 7df06fef4..b53c7134f 100644 --- a/src/evaluate/evaluator.py +++ b/src/evaluate/evaluator.py @@ -246,7 +246,10 @@ def compute( # Prepare pipeline. if isinstance(model_or_pipeline, str) or ( hasattr(model_or_pipeline, "__class__") - and model_or_pipeline.__class__.__module__.startswith("transformers.models") + and any( + cls_name in [parent_cls.__name__ for parent_cls in model_or_pipeline.__class__.__mro__] + for cls_name in ["PreTrainedModel", "TFPreTrainedModel"] + ) ): pipe = pipeline(self.task, model=model_or_pipeline, tokenizer=tokenizer) else: From d6954f0390ff0384bab6991754a7fc8df63967db Mon Sep 17 00:00:00 2001 From: Nouamane Tazi Date: Thu, 30 Jun 2022 20:56:37 +0000 Subject: [PATCH 3/4] avoid importing `transformers.TFPreTrainedModel` unless necessary --- src/evaluate/evaluator.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/evaluate/evaluator.py b/src/evaluate/evaluator.py index b53c7134f..87838ea21 100644 --- a/src/evaluate/evaluator.py +++ b/src/evaluate/evaluator.py @@ -28,6 +28,7 @@ SCIPY_AVAILABLE = False try: + import transformers from transformers import Pipeline, PreTrainedTokenizer, pipeline from transformers.pipelines import SUPPORTED_TASKS as SUPPORTED_PIPELINE_TASKS from transformers.pipelines import TASK_ALIASES @@ -244,12 +245,10 @@ def compute( ) # Prepare pipeline. - if isinstance(model_or_pipeline, str) or ( - hasattr(model_or_pipeline, "__class__") - and any( - cls_name in [parent_cls.__name__ for parent_cls in model_or_pipeline.__class__.__mro__] - for cls_name in ["PreTrainedModel", "TFPreTrainedModel"] - ) + if ( + isinstance(model_or_pipeline, str) + or isinstance(model_or_pipeline, transformers.PreTrainedModel) + or isinstance(model_or_pipeline, transformers.TFPreTrainedModel) ): pipe = pipeline(self.task, model=model_or_pipeline, tokenizer=tokenizer) else: From 4720a26cae8547f431af5c1f97cf502d3bca627f Mon Sep 17 00:00:00 2001 From: Nouamane Tazi Date: Fri, 15 Jul 2022 21:59:56 +0200 Subject: [PATCH 4/4] remove unused imports and small refactor - avoids importing xxPreTrainedModel when checking instance --- src/evaluate/evaluator/base.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/src/evaluate/evaluator/base.py b/src/evaluate/evaluator/base.py index c33a09167..e3fa8f67d 100644 --- a/src/evaluate/evaluator/base.py +++ b/src/evaluate/evaluator/base.py @@ -28,15 +28,8 @@ SCIPY_AVAILABLE = False try: - from transformers import ( - FeatureExtractionMixin, - Pipeline, - PreTrainedModel, - PreTrainedTokenizer, - PreTrainedTokenizerBase, - TFPreTrainedModel, - pipeline, - ) + import transformers + from transformers import FeatureExtractionMixin, pipeline TRANSFORMERS_AVAILABLE = True except ImportError: @@ -121,10 +114,12 @@ def predictions_processor(self, *args, **kwargs): def compute( self, - model_or_pipeline: Union[str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel"] = None, + model_or_pipeline: Union[ + str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel" # noqa: F821 + ] = None, data: Union[str, Dataset] = None, metric: Union[str, EvaluationModule] = None, - tokenizer: Optional[Union[str, "PreTrainedTokenizer"]] = None, + tokenizer: Optional[Union[str, "PreTrainedTokenizer"]] = None, # noqa: F821 feature_extractor: Optional[Union[str, "FeatureExtractionMixin"]] = None, strategy: Literal["simple", "bootstrap"] = "simple", confidence_level: float = 0.95, @@ -198,9 +193,9 @@ def prepare_data(self, data: Union[str, Dataset], input_column: str, label_colum def prepare_pipeline( self, - model_or_pipeline: Union[str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel"], - tokenizer: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"] = None, - feature_extractor: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"] = None, + model_or_pipeline: Union[str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel"], # noqa: F821 + tokenizer: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"] = None, # noqa: F821 + feature_extractor: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"] = None, # noqa: F821 ): """ Prepare pipeline. @@ -220,9 +215,9 @@ def prepare_pipeline( The initialized pipeline. """ if ( - isinstance(model_or_pipeline, PreTrainedModel) - or isinstance(model_or_pipeline, TFPreTrainedModel) - or isinstance(model_or_pipeline, str) + isinstance(model_or_pipeline, str) + or isinstance(model_or_pipeline, transformers.PreTrainedModel) + or isinstance(model_or_pipeline, transformers.TFPreTrainedModel) ): pipe = pipeline( self.task, model=model_or_pipeline, tokenizer=tokenizer, feature_extractor=feature_extractor