diff --git a/langtest/augmentation/__init__.py b/langtest/augmentation/__init__.py index 12fc92609..e84ffdf71 100644 --- a/langtest/augmentation/__init__.py +++ b/langtest/augmentation/__init__.py @@ -113,7 +113,8 @@ def fix( Returns: List[Dict[str, Any]]: A list of augmented data samples. """ - if "." not in training_data["data_source"]: + + if "source" in training_data and training_data["source"] == "huggingface": self.df = HuggingFaceDataset(training_data["data_source"], self.task) data = self.df.load_data( feature_column=training_data.get("feature_column", "text"), @@ -122,7 +123,7 @@ def fix( subset=training_data.get("subset", None), ) else: - self.df = DataFactory(training_data["data_source"], self.task) + self.df = DataFactory(training_data, self.task) data = self.df.load() TestFactory.is_augment = True supported_tests = TestFactory.test_scenarios() @@ -345,7 +346,7 @@ def fix( Returns: bool: Returns True upon successful completion of the method. """ - df = DataFactory(training_data["data_source"], self.__task) + df = DataFactory(training_data, self.__task) data = df.load() new_data = [] self.__search_results = self.search_sample_results(data) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 5de79e9d2..2bf448bcb 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -94,14 +94,21 @@ class DataFactory: correct Dataset type based on the file extension. """ - def __init__(self, file_path: str, task: str, **kwargs) -> None: + def __init__(self, file_path: dict, task: str, **kwargs) -> None: """Initializes DataFactory object. Args: - file_path (str): Path to the dataset. + file_path (dict): Dictionary containing 'data_source' key with the path to the dataset. task (str): Task to be evaluated. """ - self._file_path = file_path + if not isinstance(file_path, dict): + raise ValueError("'file_path' must be a dictionary.") + + if "data_source" not in file_path: + raise ValueError( + "The 'data_source' key must be provided in the 'file_path' dictionary." + ) + self._file_path = file_path.get("data_source") self._class_map = { cls.__name__.replace("Dataset", "").lower(): cls for cls in _IDataset.__subclasses__() diff --git a/langtest/langtest.py b/langtest/langtest.py index 281fb2aa0..c14845661 100644 --- a/langtest/langtest.py +++ b/langtest/langtest.py @@ -3,7 +3,7 @@ import os import pickle from collections import defaultdict -from typing import Any, Dict, List, Optional, Union +from typing import Dict, List, Optional, Union import pandas as pd import yaml @@ -91,18 +91,18 @@ class Harness: def __init__( self, task: str, - model: Optional[Union[str, Any]] = None, - hub: Optional[str] = None, - data: Optional[Union[str, dict]] = None, + model: Optional[Union[list, dict]] = None, + data: Optional[dict] = None, config: Optional[Union[str, dict]] = None, ): """Initialize the Harness object. Args: task (str, optional): Task for which the model is to be evaluated. - model (str | ModelFactory): ModelFactory object or path to the model to be evaluated. - hub (str, optional): model hub to load from the path. Required if path is passed as 'model'. - data (str, optional): Path to the data to be used for evaluation. + model (list | dict, optional): Specifies the model to be evaluated. + If provided as a list, each element should be a dictionary with 'model' and 'hub' keys. + If provided as a dictionary, it must contain 'model' and 'hub' keys when specifying a path. + data (dict, optional): The data to be used for evaluation. config (str | dict, optional): Configuration for the tests to be performed. Raises: @@ -111,8 +111,27 @@ def __init__( super().__init__() self.is_default = False - self._actual_model = model - self.hub = hub + + if isinstance(model, list): + for item in model: + if not isinstance(item, dict): + raise ValueError("Each item in the list must be a dictionary") + if "model" not in item or "hub" not in item: + raise ValueError( + "Each dictionary in the list must have 'model' and 'hub' keys" + ) + elif isinstance(model, dict): + if "model" not in model or "hub" not in model: + raise ValueError("The dictionary must have 'model' and 'hub' keys") + else: + raise ValueError("Invalid 'model' parameter type") + + if isinstance(model, dict): + hub, model = model["hub"], model["model"] + self.hub = hub + self._actual_model = model + else: + hub = None if task not in self.SUPPORTED_TASKS: raise ValueError( @@ -133,7 +152,7 @@ def __init__( if data is None and (task, model, hub) in self.DEFAULTS_DATASET: data_path = os.path.join("data", self.DEFAULTS_DATASET[(task, model, hub)]) - data = resource_filename("langtest", data_path) + data = {"data_source": resource_filename("langtest", data_path)} self.data = DataFactory(data, task=self.task).load() if model == "textcat_imdb": model = resource_filename("langtest", "data/textcat_imdb") @@ -142,50 +161,45 @@ def __init__( elif ( isinstance(data, dict) - and hub in self.SUPPORTED_HUBS_HF_DATASET_CLASSIFICATION - and task == "text-classification" + and "source" in data + and data["source"] == "huggingface" ): - self.data = ( - HuggingFaceDataset(data["name"], task=task).load_data( + if ( + task == "text-classification" + and hub in self.SUPPORTED_HUBS_HF_DATASET_CLASSIFICATION + ): + self.data = HuggingFaceDataset(data["data_source"], task=task).load_data( feature_column=data.get("feature_column", "text"), target_column=data.get("target_column", "label"), split=data.get("split", "test"), subset=data.get("subset", None), ) - if data is not None - else None - ) - if hub == "spacy" and (model == "textcat_imdb" or model is None): - if model is None: - logging.warning( - "Using the default 'textcat_imdb' model for Spacy hub. Please provide a custom model path if desired." - ) - model = resource_filename("langtest", "data/textcat_imdb") + if hub == "spacy" and (model == "textcat_imdb" or model is None): + if model is None: + logging.warning( + "Using the default 'textcat_imdb' model for Spacy hub. Please provide a custom model path if desired." + ) + model = resource_filename("langtest", "data/textcat_imdb") - elif ( - isinstance(data, dict) - and hub in self.SUPPORTED_HUBS_HF_DATASET_NER - and task == "ner" - ): - self.data = HuggingFaceDataset(data["name"], task=task).load_data( - feature_column=data.get("feature_column", "tokens"), - target_column=data.get("target_column", "ner_tags"), - split=data.get("split", "test"), - subset=data.get("subset", None), - ) + elif task == "ner" and hub in self.SUPPORTED_HUBS_HF_DATASET_NER: + self.data = HuggingFaceDataset(data["data_source"], task=task).load_data( + feature_column=data.get("feature_column", "tokens"), + target_column=data.get("target_column", "ner_tags"), + split=data.get("split", "test"), + subset=data.get("subset", None), + ) - elif ( - isinstance(data, dict) - and hub in self.SUPPORTED_HUBS_HF_DATASET_SUMMARIZATION - and task == "summarization" - ): - self.data = HuggingFaceDataset(data["name"], task=task).load_data( - feature_column=data.get("feature_column", "document"), - target_column=data.get("target_column", "summary"), - split=data.get("split", "test"), - subset=data.get("subset", None), - ) + elif ( + task == "summarization" + and hub in self.SUPPORTED_HUBS_HF_DATASET_SUMMARIZATION + ): + self.data = HuggingFaceDataset(data["data_source"], task=task).load_data( + feature_column=data.get("feature_column", "document"), + target_column=data.get("target_column", "summary"), + split=data.get("split", "test"), + subset=data.get("subset", None), + ) elif data is None and (task, model, hub) not in self.DEFAULTS_DATASET.keys(): raise ValueError( @@ -193,10 +207,14 @@ def __init__( "passed is not among the default ones. You need to either specify the parameter 'data' " "or use a default configuration." ) - elif isinstance(data, list): - self.data = data + elif isinstance(data["data_source"], list): + self.data = data["data_source"] else: - self.file_path = data + if "data_source" not in data: + raise ValueError( + "The 'data_source' key must be provided in the 'data' parameter." + ) + self.file_path = data["data_source"] self.data = ( DataFactory(data, task=self.task).load() if data is not None else None ) @@ -221,13 +239,19 @@ def __init__( path=model, task=task, hub=hub, **self._config.get("model_parameters", {}) ) - elif type(model) == dict: + elif isinstance(model, list): model_dict = {} - for k, v in model.items(): - model_dict[k] = ModelFactory.load_model( - task=task, path=k, hub=v, **self._config.get("model_parameters", {}) + for i in model: + model = i["model"] + hub = i["hub"] + + model_dict[model] = ModelFactory.load_model( + path=model, + task=task, + hub=hub, + **self._config.get("model_parameters", {}), ) - self.model = model_dict + self.model = model_dict else: self.model = ModelFactory( @@ -241,7 +265,7 @@ def __init__( print("Test Configuration : \n", formatted_config) global GLOBAL_MODEL - if not isinstance(model, dict): + if not isinstance(model, list): GLOBAL_MODEL = self.model self._testcases = None @@ -290,13 +314,17 @@ def configure(self, config: Union[str, dict]) -> dict: **self._config.get("model_parameters", {}), ) - elif isinstance(model, dict): + elif isinstance(model, list): model_dict = {} - for k, v in model.items(): - model_dict[k] = ModelFactory.load_model( + + for i in model: + model = i["model"] + hub = i["hub"] + + model_dict[model] = ModelFactory.load_model( + path=model, task=task, - path=k, - hub=v, + hub=hub, **self._config.get("model_parameters", {}), ) self.model = model_dict @@ -1017,9 +1045,8 @@ def load( harness = Harness( task=task, - model=model, - data=data, - hub=hub, + model={"model": model, "hub": hub}, + data={"data_source": data}, config=os.path.join(save_dir, "config.yaml"), ) harness.generate() @@ -1050,7 +1077,9 @@ def import_edited_testcases(self, input_path: str, **kwargs): if sample.category not in ["robustness", "bias"] ] - self._testcases = DataFactory(input_path, task=self.task, is_import=True).load() + self._testcases = DataFactory( + {"data_source": input_path}, task=self.task, is_import=True + ).load() self._testcases.extend(temp_testcases) return self diff --git a/langtest/pipelines/transformers/ner_pipeline.py b/langtest/pipelines/transformers/ner_pipeline.py index d4c1e7d3b..b249be34b 100644 --- a/langtest/pipelines/transformers/ner_pipeline.py +++ b/langtest/pipelines/transformers/ner_pipeline.py @@ -84,8 +84,12 @@ def setup(self): self.hub = "huggingface" self.output_dir = "checkpoints/" - self.train_datasource = DataFactory(file_path=self.train_data, task=self.task) - self.eval_datasource = DataFactory(file_path=self.eval_data, task=self.task) + self.train_datasource = DataFactory( + file_path={"data_source": self.train_data}, task=self.task + ) + self.eval_datasource = DataFactory( + file_path={"data_source": self.eval_data}, task=self.task + ) self.next(self.train) @@ -153,9 +157,8 @@ def test(self): """Performs the testing procedure of the model on a set of tests using langtest""" self.harness = Harness( task=self.task, - model=self.output_dir, - hub=self.hub, - data=self.train_data, + model={"model": self.output_dir, "hub": self.hub}, + data={"data_source": self.train_data}, ) if self.config: self.harness.configure(self.config) @@ -184,7 +187,7 @@ def augment(self): def retrain(self): """Performs the training procedure using the augmented data created by langtest""" self.augmented_train_datasource = DataFactory( - file_path=self.path_augmented_file, task=self.task + file_path={"data_source": self.path_augmented_file}, task=self.task ) samples = self.augmented_train_datasource.load_raw() diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py index 4961a7bca..caba3df32 100644 --- a/tests/test_augmentation.py +++ b/tests/test_augmentation.py @@ -16,45 +16,39 @@ def setUp(self) -> None: self.params = { "spacy_ner": { "task": "ner", - "model": "en_core_web_sm", - "data": "tests/fixtures/test.conll", + "model": {"model": "en_core_web_sm", "hub": "spacy"}, + "data": {"data_source": "tests/fixtures/test.conll"}, "config": "tests/fixtures/config_ner.yaml", - "hub": "spacy", }, "huggingface_ner": { "task": "ner", - "model": "dslim/bert-base-NER", - "data": "tests/fixtures/test.conll", + "model": {"model": "dslim/bert-base-NER", "hub": "huggingface"}, + "data": {"data_source": "tests/fixtures/test.conll"}, "config": "tests/fixtures/config_ner.yaml", - "hub": "huggingface", }, "huggingface_textclassification": { "task": "text-classification", - "model": "distilbert-base-uncased", - "data": "tests/fixtures/test.conll", + "model": {"model": "distilbert-base-uncased", "hub": "huggingface"}, + "data": {"data_source": "tests/fixtures/test.conll"}, "config": "tests/fixtures/config_ner.yaml", - "hub": "huggingface", }, "huggingface_textclassification_csv_dataset": { "task": "text-classification", - "model": "lvwerra/distilbert-imdb", - "data": "tests/fixtures/text_classification.csv", + "model": {"model": "lvwerra/distilbert-imdb", "hub": "huggingface"}, + "data": {"data_source": "tests/fixtures/text_classification.csv"}, "config": "tests/fixtures/config_text_classification.yaml", - "hub": "huggingface", }, "spacy_textclassification_hf_dataset": { "task": "text-classification", - "model": "textcat_imdb", - "data": {"name": "imdb"}, + "model": {"model": "textcat_imdb", "hub": "spacy"}, + "data": {"data_source": "imdb", "source": "huggingface"}, "config": "tests/fixtures/config_text_classification.yaml", - "hub": "spacy", }, "huggingface_textclassification_hf_dataset": { "task": "text-classification", - "model": "lvwerra/distilbert-imdb", - "data": {"name": "imdb"}, + "model": {"model": "lvwerra/distilbert-imdb", "hub": "huggingface"}, + "data": {"data_source": "imdb", "source": "huggingface"}, "config": "tests/fixtures/config_text_classification.yaml", - "hub": "huggingface", }, } @@ -208,7 +202,7 @@ def test_hf_dataset_textclassification_hf(self): self.assertIsInstance(report, pd.DataFrame) custom_proportions = {"uppercase": 0.8, "lowercase": 0.8} harness.augment( - training_data={"data_source": "imdb"}, + training_data={"data_source": "imdb", "source": "huggingface"}, save_data_path="tests/fixtures/augmented_train_transformed.csv", custom_proportions=custom_proportions, export_mode="transformed", @@ -228,7 +222,7 @@ def test_hf_dataset_textclassification_spacy(self): self.assertIsInstance(report, pd.DataFrame) custom_proportions = {"uppercase": 0.8, "lowercase": 0.8} harness.augment( - training_data={"data_source": "imdb"}, + training_data={"data_source": "imdb", "source": "huggingface"}, save_data_path="tests/fixtures/augmented_train_transformed.csv", custom_proportions=custom_proportions, export_mode="transformed", diff --git a/tests/test_harness.py b/tests/test_harness.py index 34bc9c134..36f43f146 100644 --- a/tests/test_harness.py +++ b/tests/test_harness.py @@ -18,10 +18,9 @@ def setUpClass(cls) -> None: cls.config_path = "tests/fixtures/config_ner.yaml" cls.harness = Harness( task="ner", - model="dslim/bert-base-NER", - data=cls.data_path, + model={"model": "dslim/bert-base-NER", "hub": "huggingface"}, + data={"data_source": cls.data_path}, config=cls.config_path, - hub="huggingface", ) cls.harness.generate().run() @@ -39,8 +38,8 @@ def test_missing_parameter(self): with self.assertRaises(ValueError) as _: Harness( task="ner", - model="dslim/bert-base-NER", - data=self.data_path, + model={"model": "dslim/bert-base-NER"}, + data={"data_source": self.data_path}, config=self.config_path, ) @@ -93,10 +92,9 @@ def test_incompatible_tasks(self): with self.assertRaises(ValueError): Harness( task="text-classifer", - model="dslim/bert-base-NER", - data=self.data_path, + model={"model": "dslim/bert-base-NER", "hub": "huggingface"}, + data={"data_source": self.data_path}, config=self.config_path, - hub="huggingface", ) def test_unsupported_test_for_task(self): @@ -106,8 +104,7 @@ def test_unsupported_test_for_task(self): with self.assertRaises(ValueError): h = Harness( task="text-classification", - model="textcat_imdb", - hub="spacy", + model={"model": "textcat_imdb", "hub": "spacy"}, config={ "tests": {"robustness": {"swap_entities": {"min_pass_rate": 0.5}}} }, @@ -146,10 +143,9 @@ def test_load_text_classification(self): save_dir = "/tmp/saved_text_classification_harness_test" tc_harness = Harness( task="text-classification", - model="bert-base-cased", - data="tests/fixtures/text_classification.csv", + model={"model": "bert-base-cased", "hub": "huggingface"}, + data={"data_source": "tests/fixtures/text_classification.csv"}, config="tests/fixtures/config_text_classification.yaml", - hub="huggingface", ) tc_harness.generate() tc_harness.save(save_dir) @@ -157,7 +153,7 @@ def test_load_text_classification(self): loaded_tc_harness = Harness.load( save_dir=save_dir, task="text-classification", - model="bert-base-uncased", + model="bert-base-cased", hub="huggingface", ) self.assertEqual(tc_harness._config, loaded_tc_harness._config) @@ -171,9 +167,8 @@ def test_load_HF_data_text_classification(self): save_dir = "/tmp/saved_HF_data_text_classification_harness_test" tc_harness = Harness( task="text-classification", - hub="huggingface", - model="aychang/roberta-base-imdb", - data={"name": "imdb"}, + model={"model": "aychang/roberta-base-imdb", "hub": "huggingface"}, + data={"data_source": "imdb", "source": "huggingface"}, ) tc_harness.data = tc_harness.data[:10] tc_harness.generate() @@ -198,9 +193,8 @@ def test_harness_edit_import_testcases(self): harness = Harness( task="ner", - model="bert-base-cased", - data="tests/fixtures/test.conll", - hub="huggingface", + model={"model": "bert-base-cased", "hub": "huggingface"}, + data={"data_source": "tests/fixtures/test.conll"}, ) harness.data = harness.data[:10] harness.generate() @@ -233,34 +227,38 @@ def test_non_existing_default(self): Test handling of non-existing default models. """ with self.assertRaises(ValueError): - h = Harness(task="ner", model="xxxxxxxxx", hub="spacy") + h = Harness(task="ner", model={"model": "xxxxxxxxx", "hub": "spacy"}) def test_ner_spacy(self): """ Test NER task with Spacy model. """ - h = Harness(task="ner", model="en_core_web_sm", hub="spacy") + h = Harness(task="ner", model={"model": "en_core_web_sm", "hub": "spacy"}) h.generate().run().report() def test_ner_hf(self): """ Test NER task with Hugging Face model. """ - h = Harness(task="ner", model="dslim/bert-base-NER", hub="huggingface") + h = Harness( + task="ner", model={"model": "dslim/bert-base-NER", "hub": "huggingface"} + ) h.generate().run().report() def test_ner_jsl(self): """ Test NER task with John Snow Labs model. """ - h = Harness(task="ner", model="ner_dl_bert", hub="johnsnowlabs") + h = Harness(task="ner", model={"model": "ner_dl_bert", "hub": "johnsnowlabs"}) h.generate().run().report() def test_text_classification_spacy(self): """ Test text classification task with Spacy model. """ - h = Harness(task="text-classification", model="textcat_imdb", hub="spacy") + h = Harness( + task="text-classification", model={"model": "textcat_imdb", "hub": "spacy"} + ) h.generate().run().report() def test_text_classification_hf(self): @@ -268,7 +266,8 @@ def test_text_classification_hf(self): Test text classification task with Hugging Face model. """ h = Harness( - task="text-classification", model="lvwerra/distilbert-imdb", hub="huggingface" + task="text-classification", + model={"model": "lvwerra/distilbert-imdb", "hub": "huggingface"}, ) h.generate().run().report() @@ -279,8 +278,7 @@ def test_text_classification_jsl(self): try: h = Harness( task="text-classification", - model="en.sentiment.imdb.glove", - hub="johnsnowlabs", + model={"model": "en.sentiment.imdb.glove", "hub": "johnsnowlabs"}, ) h.generate().run().report() except Exception as e: diff --git a/tests/test_mlflow.py b/tests/test_mlflow.py index 38af9dd24..16321c2a1 100644 --- a/tests/test_mlflow.py +++ b/tests/test_mlflow.py @@ -16,10 +16,9 @@ def setUp(self) -> None: """ self.params = { "task": "ner", - "model": "dslim/bert-base-NER", - "data": "tests/fixtures/test.conll", + "model": {"model": "dslim/bert-base-NER", "hub": "huggingface"}, + "data": {"data_source": "tests/fixtures/test.conll"}, "config": "tests/fixtures/config_ner.yaml", - "hub": "huggingface", } def test_mlflow(self): @@ -29,5 +28,5 @@ def test_mlflow(self): harness = Harness(**self.params) harness.data = harness.data[0:5] harness.generate().run().report(mlflow_tracking=True) - experiment_id = mlflow.get_experiment_by_name(self.params["model"]) + experiment_id = mlflow.get_experiment_by_name(self.params["model"]["model"]) self.assertIsNotNone(experiment_id) diff --git a/tests/test_performance.py b/tests/test_performance.py index 23c9ccb57..5c4fe9637 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -9,24 +9,21 @@ def setUp(self) -> None: self.params = { "spacy_ner": { "task": "ner", - "model": "en_core_web_sm", - "data": "tests/fixtures/test.conll", + "model": {"model": "en_core_web_sm", "hub": "spacy"}, + "data": {"data_source": "tests/fixtures/test.conll"}, "config": "tests/fixtures/config_performance.yaml", - "hub": "spacy", }, "huggingface_ner": { "task": "ner", - "model": "dslim/bert-base-NER", - "data": "tests/fixtures/test.conll", + "model": {"model": "dslim/bert-base-NER", "hub": "huggingface"}, + "data": {"data_source": "tests/fixtures/test.conll"}, "config": "tests/fixtures/config_performance.yaml", - "hub": "huggingface", }, "huggingface_textclassification": { "task": "text-classification", - "model": "distilbert-base-uncased", - "data": "tests/fixtures/text_classification.csv", + "model": {"model": "distilbert-base-uncased", "hub": "huggingface"}, + "data": {"data_source": "tests/fixtures/text_classification.csv"}, "config": "tests/fixtures/config_performance.yaml", - "hub": "huggingface", }, } diff --git a/tests/test_spacy_model.py b/tests/test_spacy_model.py index 61dc4cb24..6e4ca6b91 100644 --- a/tests/test_spacy_model.py +++ b/tests/test_spacy_model.py @@ -16,10 +16,9 @@ def setUp(self) -> None: """ self.params = { "task": "ner", - "model": "en_core_web_sm", - "data": "langtest/data/conll/sample.conll", + "model": {"model": "en_core_web_sm", "hub": "spacy"}, + "data": {"data_source": "langtest/data/conll/sample.conll"}, "config": "tests/fixtures/config_ner.yaml", - "hub": "spacy", } def test_Harness(self): diff --git a/tests/test_sparknlp_model.py b/tests/test_sparknlp_model.py index 7952a158d..4f989b1d2 100644 --- a/tests/test_sparknlp_model.py +++ b/tests/test_sparknlp_model.py @@ -12,10 +12,9 @@ class SparkNLPTestCase(unittest.TestCase): def setUp(self) -> None: self.params = { "task": "ner", - "model": "ner_dl_bert", - "data": "tests/fixtures/test.conll", + "model": {"model": "ner_dl_bert", "hub": "johnsnowlabs"}, + "data": {"data_source": "tests/fixtures/test.conll"}, "config": "tests/fixtures/config_ner.yaml", - "hub": "johnsnowlabs", } def test_predict(self): diff --git a/tests/test_translation.py b/tests/test_translation.py index 56e622ace..3ee33e697 100644 --- a/tests/test_translation.py +++ b/tests/test_translation.py @@ -13,9 +13,8 @@ def setUp(self) -> None: """ self.harness = Harness( task="translation", - model="t5-base", - hub="huggingface", - data="Translation-test", + model={"model": "t5-base", "hub": "huggingface"}, + data={"data_source": "Translation-test"}, ) # configure the harness