From 6b1d47fa1f8d269a7435d7c1011dd4e31a624239 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Fri, 11 Aug 2023 14:03:17 +0530
Subject: [PATCH 01/31] updated model as dict

---
 langtest/langtest.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index 281fb2aa0..1fb1c3126 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -91,8 +91,7 @@ class Harness:
     def __init__(
         self,
         task: str,
-        model: Optional[Union[str, Any]] = None,
-        hub: Optional[str] = None,
+        model: Optional[Union[list, dict]] = None,
         data: Optional[Union[str, dict]] = None,
         config: Optional[Union[str, dict]] = None,
     ):
@@ -111,8 +110,26 @@ def __init__(
         super().__init__()
 
         self.is_default = False
-        self._actual_model = model
-        self.hub = hub
+
+        if isinstance(model, list):
+            for item in model:
+                if not isinstance(item, dict):
+                    raise ValueError("Each item in the list must be a dictionary")
+                if "model" not in item or "hub" not in item:
+                    raise ValueError(
+                        "Each dictionary in the list must have 'model' and 'hub' keys"
+                    )
+        elif isinstance(model, dict):
+            if "model" not in model or "hub" not in model:
+                raise ValueError("The dictionary must have 'model' and 'hub' keys")
+        else:
+            raise ValueError("Invalid 'model' parameter type")
+
+        if isinstance(model, dict):
+            hub, model = model["hub"], model["model"]
+            self._actual_model = model
+        else:
+            hub = None
 
         if task not in self.SUPPORTED_TASKS:
             raise ValueError(

From 8ed0f498c28b469f1904a6817b343aa7bcd42094 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Fri, 11 Aug 2023 14:05:23 +0530
Subject: [PATCH 02/31] Refacto(langtest.py): Updated Model as a list of dict

---
 langtest/langtest.py | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index 1fb1c3126..ca4e36836 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -238,13 +238,19 @@ def __init__(
                 path=model, task=task, hub=hub, **self._config.get("model_parameters", {})
             )
 
-        elif type(model) == dict:
+        elif isinstance(model, list):
             model_dict = {}
-            for k, v in model.items():
-                model_dict[k] = ModelFactory.load_model(
-                    task=task, path=k, hub=v, **self._config.get("model_parameters", {})
+            for i in model:
+                model = i["model"]
+                hub = i["hub"]
+
+                model_dict[model] = ModelFactory.load_model(
+                    path=model,
+                    task=task,
+                    hub=hub,
+                    **self._config.get("model_parameters", {}),
                 )
-            self.model = model_dict
+                self.model = model_dict
 
         else:
             self.model = ModelFactory(
@@ -258,7 +264,7 @@ def __init__(
         print("Test Configuration : \n", formatted_config)
 
         global GLOBAL_MODEL
-        if not isinstance(model, dict):
+        if not isinstance(model, list):
             GLOBAL_MODEL = self.model
 
         self._testcases = None
@@ -307,13 +313,17 @@ def configure(self, config: Union[str, dict]) -> dict:
                     **self._config.get("model_parameters", {}),
                 )
 
-            elif isinstance(model, dict):
+            elif isinstance(model, list):
                 model_dict = {}
-                for k, v in model.items():
-                    model_dict[k] = ModelFactory.load_model(
+
+                for i in model:
+                    model = i["model"]
+                    hub = i["hub"]
+
+                    model_dict[model] = ModelFactory.load_model(
+                        path=model,
                         task=task,
-                        path=k,
-                        hub=v,
+                        hub=hub,
                         **self._config.get("model_parameters", {}),
                     )
                 self.model = model_dict

From a412253e14bb96bc6117aaa631b74fd3b65029c8 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Fri, 11 Aug 2023 14:42:53 +0530
Subject: [PATCH 03/31] test(translation): Updated model type

---
 tests/test_translation.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_translation.py b/tests/test_translation.py
index 56e622ace..9e4a69b3e 100644
--- a/tests/test_translation.py
+++ b/tests/test_translation.py
@@ -13,8 +13,7 @@ def setUp(self) -> None:
         """
         self.harness = Harness(
             task="translation",
-            model="t5-base",
-            hub="huggingface",
+            model={"model": "t5-base", "hub": "huggingface"},
             data="Translation-test",
         )
 

From e1e0a894b46828f40a29f359d90848909da47cf0 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Fri, 11 Aug 2023 14:46:13 +0530
Subject: [PATCH 04/31] ner_pipeline.py: updated model type

---
 langtest/pipelines/transformers/ner_pipeline.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/langtest/pipelines/transformers/ner_pipeline.py b/langtest/pipelines/transformers/ner_pipeline.py
index d4c1e7d3b..fb92c7506 100644
--- a/langtest/pipelines/transformers/ner_pipeline.py
+++ b/langtest/pipelines/transformers/ner_pipeline.py
@@ -153,8 +153,7 @@ def test(self):
         """Performs the testing procedure of the model on a set of tests using langtest"""
         self.harness = Harness(
             task=self.task,
-            model=self.output_dir,
-            hub=self.hub,
+            model={"model": self.output_dir, "hub": self.hub},
             data=self.train_data,
         )
         if self.config:

From 82dfc8662aa90075e30890c882cf0fd781881e87 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Fri, 11 Aug 2023 14:48:00 +0530
Subject: [PATCH 05/31] test(augmentation): Updated model type

---
 tests/test_augmentation.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index 4961a7bca..e4ac50755 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -16,45 +16,40 @@ def setUp(self) -> None:
         self.params = {
             "spacy_ner": {
                 "task": "ner",
-                "model": "en_core_web_sm",
+                "model": {"model": "en_core_web_sm", "hub": "spacy"},
                 "data": "tests/fixtures/test.conll",
                 "config": "tests/fixtures/config_ner.yaml",
-                "hub": "spacy",
             },
             "huggingface_ner": {
                 "task": "ner",
-                "model": "dslim/bert-base-NER",
+                "model": {"model": "dslim/bert-base-NER", "hub": "huggingface"},
                 "data": "tests/fixtures/test.conll",
                 "config": "tests/fixtures/config_ner.yaml",
-                "hub": "huggingface",
             },
             "huggingface_textclassification": {
                 "task": "text-classification",
-                "model": "distilbert-base-uncased",
+                "model": {"model": "distilbert-base-uncased", "hub": "huggingface"},
                 "data": "tests/fixtures/test.conll",
                 "config": "tests/fixtures/config_ner.yaml",
-                "hub": "huggingface",
             },
             "huggingface_textclassification_csv_dataset": {
                 "task": "text-classification",
-                "model": "lvwerra/distilbert-imdb",
+                "model": {"model": "lvwerra/distilbert-imdb", "hub": "huggingface"},
                 "data": "tests/fixtures/text_classification.csv",
                 "config": "tests/fixtures/config_text_classification.yaml",
-                "hub": "huggingface",
             },
             "spacy_textclassification_hf_dataset": {
                 "task": "text-classification",
-                "model": "textcat_imdb",
+                "model": {"model": "textcat_imdb", "hub": "spacy"},
                 "data": {"name": "imdb"},
                 "config": "tests/fixtures/config_text_classification.yaml",
-                "hub": "spacy",
+
             },
             "huggingface_textclassification_hf_dataset": {
                 "task": "text-classification",
-                "model": "lvwerra/distilbert-imdb",
+                "model": {"model": "lvwerra/distilbert-imdb", "hub": "huggingface"},
                 "data": {"name": "imdb"},
                 "config": "tests/fixtures/config_text_classification.yaml",
-                "hub": "huggingface",
             },
         }
 
@@ -317,4 +312,4 @@ def test_fix(self):
         with open("/tmp/augmented_conll.conll", "r") as reader:
             lines = [line.strip() for line in reader.readlines() if line.strip() != ""]
 
-        self.assertListEqual(lines, expected_result)
+        self.assertListEqual(lines, expected_result)
\ No newline at end of file

From 33baf230ea3930f42d103d528c5330efd07982ff Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Fri, 11 Aug 2023 14:52:17 +0530
Subject: [PATCH 06/31] test(test_harness.py):updated params

---
 tests/test_harness.py | 41 +++++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/tests/test_harness.py b/tests/test_harness.py
index 34bc9c134..d55e4949f 100644
--- a/tests/test_harness.py
+++ b/tests/test_harness.py
@@ -18,10 +18,9 @@ def setUpClass(cls) -> None:
         cls.config_path = "tests/fixtures/config_ner.yaml"
         cls.harness = Harness(
             task="ner",
-            model="dslim/bert-base-NER",
+            model={"model": "dslim/bert-base-NER", "hub": "huggingface"},
             data=cls.data_path,
             config=cls.config_path,
-            hub="huggingface",
         )
 
         cls.harness.generate().run()
@@ -39,7 +38,7 @@ def test_missing_parameter(self):
         with self.assertRaises(ValueError) as _:
             Harness(
                 task="ner",
-                model="dslim/bert-base-NER",
+                model={"model": "dslim/bert-base-NER"},
                 data=self.data_path,
                 config=self.config_path,
             )
@@ -93,10 +92,9 @@ def test_incompatible_tasks(self):
         with self.assertRaises(ValueError):
             Harness(
                 task="text-classifer",
-                model="dslim/bert-base-NER",
+                model={"model": "dslim/bert-base-NER", "hub": "huggingface"},
                 data=self.data_path,
                 config=self.config_path,
-                hub="huggingface",
             )
 
     def test_unsupported_test_for_task(self):
@@ -106,7 +104,7 @@ def test_unsupported_test_for_task(self):
         with self.assertRaises(ValueError):
             h = Harness(
                 task="text-classification",
-                model="textcat_imdb",
+                model={"model": "textcat_imdb", "hub": "spacy"},
                 hub="spacy",
                 config={
                     "tests": {"robustness": {"swap_entities": {"min_pass_rate": 0.5}}}
@@ -146,10 +144,9 @@ def test_load_text_classification(self):
         save_dir = "/tmp/saved_text_classification_harness_test"
         tc_harness = Harness(
             task="text-classification",
-            model="bert-base-cased",
+            model={"model": "bert-base-cased", "hub": "huggingface"},
             data="tests/fixtures/text_classification.csv",
             config="tests/fixtures/config_text_classification.yaml",
-            hub="huggingface",
         )
         tc_harness.generate()
         tc_harness.save(save_dir)
@@ -157,8 +154,7 @@ def test_load_text_classification(self):
         loaded_tc_harness = Harness.load(
             save_dir=save_dir,
             task="text-classification",
-            model="bert-base-uncased",
-            hub="huggingface",
+            model={"model": "bert-base-cased", "hub": "huggingface"},
         )
         self.assertEqual(tc_harness._config, loaded_tc_harness._config)
         self.assertEqual(tc_harness.data, loaded_tc_harness.data)
@@ -171,8 +167,7 @@ def test_load_HF_data_text_classification(self):
         save_dir = "/tmp/saved_HF_data_text_classification_harness_test"
         tc_harness = Harness(
             task="text-classification",
-            hub="huggingface",
-            model="aychang/roberta-base-imdb",
+            model={"model": "aychang/roberta-base-imdb", "hub": "huggingface"},
             data={"name": "imdb"},
         )
         tc_harness.data = tc_harness.data[:10]
@@ -182,8 +177,7 @@ def test_load_HF_data_text_classification(self):
         loaded_tc_harness = Harness.load(
             save_dir=save_dir,
             task="text-classification",
-            model="aychang/roberta-base-imdb",
-            hub="huggingface",
+            model={"model": "aychang/roberta-base-imdb", "hub": "huggingface"},
         )
         self.assertEqual(tc_harness._config, loaded_tc_harness._config)
         self.assertEqual(tc_harness.data, loaded_tc_harness.data)
@@ -198,9 +192,8 @@ def test_harness_edit_import_testcases(self):
 
         harness = Harness(
             task="ner",
-            model="bert-base-cased",
+            model={"model": "bert-base-cased", "hub": "huggingface"},
             data="tests/fixtures/test.conll",
-            hub="huggingface",
         )
         harness.data = harness.data[:10]
         harness.generate()
@@ -246,21 +239,25 @@ def test_ner_hf(self):
         """
         Test NER task with Hugging Face model.
         """
-        h = Harness(task="ner", model="dslim/bert-base-NER", hub="huggingface")
+        h = Harness(
+            task="ner", model={"model": "dslim/bert-base-NER", "hub": "huggingface"}
+        )
         h.generate().run().report()
 
     def test_ner_jsl(self):
         """
         Test NER task with John Snow Labs model.
         """
-        h = Harness(task="ner", model="ner_dl_bert", hub="johnsnowlabs")
+        h = Harness(task="ner", model={"model": "ner_dl_bert", "hub": "johnsnowlabs"})
         h.generate().run().report()
 
     def test_text_classification_spacy(self):
         """
         Test text classification task with Spacy model.
         """
-        h = Harness(task="text-classification", model="textcat_imdb", hub="spacy")
+        h = Harness(
+            task="text-classification", model={"model": "textcat_imdb", "hub": "spacy"}
+        )
         h.generate().run().report()
 
     def test_text_classification_hf(self):
@@ -268,7 +265,8 @@ def test_text_classification_hf(self):
         Test text classification task with Hugging Face model.
         """
         h = Harness(
-            task="text-classification", model="lvwerra/distilbert-imdb", hub="huggingface"
+            task="text-classification",
+            model={"model": "lvwerra/distilbert-imdb", "hub": "huggingface"},
         )
         h.generate().run().report()
 
@@ -279,8 +277,7 @@ def test_text_classification_jsl(self):
         try:
             h = Harness(
                 task="text-classification",
-                model="en.sentiment.imdb.glove",
-                hub="johnsnowlabs",
+                model={"model": "en.sentiment.imdb.glove", "hub": "johnsnowlabs"},
             )
             h.generate().run().report()
         except Exception as e:

From 7884cc241c46cbc806660c8306985eaebd2d95de Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Fri, 11 Aug 2023 14:56:04 +0530
Subject: [PATCH 07/31] test(performance): Updated model as a dict

---
 tests/test_augmentation.py | 3 +--
 tests/test_performance.py  | 9 +++------
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index e4ac50755..4192c7e7b 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -43,7 +43,6 @@ def setUp(self) -> None:
                 "model": {"model": "textcat_imdb", "hub": "spacy"},
                 "data": {"name": "imdb"},
                 "config": "tests/fixtures/config_text_classification.yaml",
-
             },
             "huggingface_textclassification_hf_dataset": {
                 "task": "text-classification",
@@ -312,4 +311,4 @@ def test_fix(self):
         with open("/tmp/augmented_conll.conll", "r") as reader:
             lines = [line.strip() for line in reader.readlines() if line.strip() != ""]
 
-        self.assertListEqual(lines, expected_result)
\ No newline at end of file
+        self.assertListEqual(lines, expected_result)
diff --git a/tests/test_performance.py b/tests/test_performance.py
index 23c9ccb57..4e55871a3 100644
--- a/tests/test_performance.py
+++ b/tests/test_performance.py
@@ -9,24 +9,21 @@ def setUp(self) -> None:
         self.params = {
             "spacy_ner": {
                 "task": "ner",
-                "model": "en_core_web_sm",
+                "model": {"model": "en_core_web_sm", "hub": "spacy"},
                 "data": "tests/fixtures/test.conll",
                 "config": "tests/fixtures/config_performance.yaml",
-                "hub": "spacy",
             },
             "huggingface_ner": {
                 "task": "ner",
-                "model": "dslim/bert-base-NER",
+                "model": {"model": "dslim/bert-base-NER", "hub": "huggingface"},
                 "data": "tests/fixtures/test.conll",
                 "config": "tests/fixtures/config_performance.yaml",
-                "hub": "huggingface",
             },
             "huggingface_textclassification": {
                 "task": "text-classification",
-                "model": "distilbert-base-uncased",
+                "model": {"model": "distilbert-base-uncased", "hub": "huggingface"},
                 "data": "tests/fixtures/text_classification.csv",
                 "config": "tests/fixtures/config_performance.yaml",
-                "hub": "huggingface",
             },
         }
 

From ca8a12fe2a4c79ff06da8f989ec61d3c5f706c9e Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Fri, 11 Aug 2023 14:57:26 +0530
Subject: [PATCH 08/31] test(spacy): Updated model as a dict

---
 tests/test_spacy_model.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_spacy_model.py b/tests/test_spacy_model.py
index 61dc4cb24..d9a5407c1 100644
--- a/tests/test_spacy_model.py
+++ b/tests/test_spacy_model.py
@@ -16,10 +16,9 @@ def setUp(self) -> None:
         """
         self.params = {
             "task": "ner",
-            "model": "en_core_web_sm",
+            "model": {"model": "en_core_web_sm", "hub": "spacy"},
             "data": "langtest/data/conll/sample.conll",
             "config": "tests/fixtures/config_ner.yaml",
-            "hub": "spacy",
         }
 
     def test_Harness(self):

From 357e6d409f6bfde7223bc559a7127c29f7edd345 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Fri, 11 Aug 2023 15:00:55 +0530
Subject: [PATCH 09/31] test(test_mlflow.py):updated params

---
 tests/test_mlflow.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_mlflow.py b/tests/test_mlflow.py
index 38af9dd24..3eb5a18e7 100644
--- a/tests/test_mlflow.py
+++ b/tests/test_mlflow.py
@@ -16,10 +16,9 @@ def setUp(self) -> None:
         """
         self.params = {
             "task": "ner",
-            "model": "dslim/bert-base-NER",
+            "model": {"model": "dslim/bert-base-NER", "hub": "huggingface"},
             "data": "tests/fixtures/test.conll",
             "config": "tests/fixtures/config_ner.yaml",
-            "hub": "huggingface",
         }
 
     def test_mlflow(self):

From a405d4b8686d520ab595502d21614eee61d5563c Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Fri, 11 Aug 2023 15:01:57 +0530
Subject: [PATCH 10/31] test(test_sparknlp_model.py):updated params

---
 tests/test_sparknlp_model.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_sparknlp_model.py b/tests/test_sparknlp_model.py
index 7952a158d..3bc41730e 100644
--- a/tests/test_sparknlp_model.py
+++ b/tests/test_sparknlp_model.py
@@ -12,10 +12,9 @@ class SparkNLPTestCase(unittest.TestCase):
     def setUp(self) -> None:
         self.params = {
             "task": "ner",
-            "model": "ner_dl_bert",
+            "model": {"model": "ner_dl_bert", "hub": "johnsnowlabs"},
             "data": "tests/fixtures/test.conll",
             "config": "tests/fixtures/config_ner.yaml",
-            "hub": "johnsnowlabs",
         }
 
     def test_predict(self):

From ad5d06bffc3053bb10678b7be027b6760db488d8 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Sat, 12 Aug 2023 00:47:27 +0530
Subject: [PATCH 11/31] refacto(datasource) : Added data_source param

---
 langtest/datahandler/datasource.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index 5de79e9d2..29ec34141 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -101,7 +101,7 @@ def __init__(self, file_path: str, task: str, **kwargs) -> None:
             file_path (str): Path to the dataset.
             task (str): Task to be evaluated.
         """
-        self._file_path = file_path
+        self._file_path = file_path["data_source"]
         self._class_map = {
             cls.__name__.replace("Dataset", "").lower(): cls
             for cls in _IDataset.__subclasses__()
@@ -1179,4 +1179,4 @@ def _row_to_ner_sample(self, data_row: dict) -> Sample:
         original = " ".join(tokens)
         return NERSample(
             original=original, expected_results=NEROutput(predictions=ner_labels)
-        )
+        )
\ No newline at end of file

From 762359a51a4265528cb0ec0b198ea273c17dd327 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Sat, 12 Aug 2023 01:13:04 +0530
Subject: [PATCH 12/31] langtest.py: updated data param as dict

---
 langtest/langtest.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index ca4e36836..a8df43938 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -92,16 +92,17 @@ def __init__(
         self,
         task: str,
         model: Optional[Union[list, dict]] = None,
-        data: Optional[Union[str, dict]] = None,
+        data: Optional[dict] = None,
         config: Optional[Union[str, dict]] = None,
     ):
         """Initialize the Harness object.
 
         Args:
             task (str, optional): Task for which the model is to be evaluated.
-            model (str | ModelFactory): ModelFactory object or path to the model to be evaluated.
-            hub (str, optional): model hub to load from the path. Required if path is passed as 'model'.
-            data (str, optional): Path to the data to be used for evaluation.
+            model (list | dict, optional): Specifies the model to be evaluated. 
+                If provided as a list, each element should be a dictionary with 'model' and 'hub' keys.
+                If provided as a dictionary, it must contain 'model' and 'hub' keys when specifying a path.
+            data (dict, optional): The data to be used for evaluation.
             config (str | dict, optional): Configuration for the tests to be performed.
 
         Raises:
@@ -150,7 +151,7 @@ def __init__(
 
         if data is None and (task, model, hub) in self.DEFAULTS_DATASET:
             data_path = os.path.join("data", self.DEFAULTS_DATASET[(task, model, hub)])
-            data = resource_filename("langtest", data_path)
+            data = {"data_source":resource_filename("langtest", data_path)}
             self.data = DataFactory(data, task=self.task).load()
             if model == "textcat_imdb":
                 model = resource_filename("langtest", "data/textcat_imdb")
@@ -159,11 +160,12 @@ def __init__(
 
         elif (
             isinstance(data, dict)
+            and "." not in data["data_source"]
             and hub in self.SUPPORTED_HUBS_HF_DATASET_CLASSIFICATION
             and task == "text-classification"
         ):
             self.data = (
-                HuggingFaceDataset(data["name"], task=task).load_data(
+                HuggingFaceDataset(data["data_source"], task=task).load_data(
                     feature_column=data.get("feature_column", "text"),
                     target_column=data.get("target_column", "label"),
                     split=data.get("split", "test"),
@@ -182,10 +184,11 @@ def __init__(
 
         elif (
             isinstance(data, dict)
+            and "." not in data["data_source"]
             and hub in self.SUPPORTED_HUBS_HF_DATASET_NER
             and task == "ner"
         ):
-            self.data = HuggingFaceDataset(data["name"], task=task).load_data(
+            self.data = HuggingFaceDataset(data["data_source"], task=task).load_data(
                 feature_column=data.get("feature_column", "tokens"),
                 target_column=data.get("target_column", "ner_tags"),
                 split=data.get("split", "test"),
@@ -194,10 +197,11 @@ def __init__(
 
         elif (
             isinstance(data, dict)
+            and "." not in data["data_source"]
             and hub in self.SUPPORTED_HUBS_HF_DATASET_SUMMARIZATION
             and task == "summarization"
         ):
-            self.data = HuggingFaceDataset(data["name"], task=task).load_data(
+            self.data = HuggingFaceDataset(data["data_source"], task=task).load_data(
                 feature_column=data.get("feature_column", "document"),
                 target_column=data.get("target_column", "summary"),
                 split=data.get("split", "test"),
@@ -210,8 +214,6 @@ def __init__(
                 "passed is not among the default ones. You need to either specify the parameter 'data' "
                 "or use a default configuration."
             )
-        elif isinstance(data, list):
-            self.data = data
         else:
             self.file_path = data
             self.data = (

From 34f48485fa7d2ad85792cb3a751255fdab23eb82 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Sat, 12 Aug 2023 15:58:18 +0530
Subject: [PATCH 13/31] tests : updated data param as a dict

---
 tests/test_augmentation.py | 12 ++++++------
 tests/test_harness.py      | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index 4192c7e7b..229847df8 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -17,37 +17,37 @@ def setUp(self) -> None:
             "spacy_ner": {
                 "task": "ner",
                 "model": {"model": "en_core_web_sm", "hub": "spacy"},
-                "data": "tests/fixtures/test.conll",
+                "data": {"data_source": "tests/fixtures/test.conll"},
                 "config": "tests/fixtures/config_ner.yaml",
             },
             "huggingface_ner": {
                 "task": "ner",
                 "model": {"model": "dslim/bert-base-NER", "hub": "huggingface"},
-                "data": "tests/fixtures/test.conll",
+                "data": {"data_source": "tests/fixtures/test.conll"},
                 "config": "tests/fixtures/config_ner.yaml",
             },
             "huggingface_textclassification": {
                 "task": "text-classification",
                 "model": {"model": "distilbert-base-uncased", "hub": "huggingface"},
-                "data": "tests/fixtures/test.conll",
+                "data": {"data_source": "tests/fixtures/test.conll"},
                 "config": "tests/fixtures/config_ner.yaml",
             },
             "huggingface_textclassification_csv_dataset": {
                 "task": "text-classification",
                 "model": {"model": "lvwerra/distilbert-imdb", "hub": "huggingface"},
-                "data": "tests/fixtures/text_classification.csv",
+                "data": {"data_source": "tests/fixtures/text_classification.csv"},
                 "config": "tests/fixtures/config_text_classification.yaml",
             },
             "spacy_textclassification_hf_dataset": {
                 "task": "text-classification",
                 "model": {"model": "textcat_imdb", "hub": "spacy"},
-                "data": {"name": "imdb"},
+                "data": {"data_source": "imdb"},
                 "config": "tests/fixtures/config_text_classification.yaml",
             },
             "huggingface_textclassification_hf_dataset": {
                 "task": "text-classification",
                 "model": {"model": "lvwerra/distilbert-imdb", "hub": "huggingface"},
-                "data": {"name": "imdb"},
+                "data": {"data_source": "imdb"},
                 "config": "tests/fixtures/config_text_classification.yaml",
             },
         }
diff --git a/tests/test_harness.py b/tests/test_harness.py
index d55e4949f..8dc46e1a2 100644
--- a/tests/test_harness.py
+++ b/tests/test_harness.py
@@ -19,7 +19,7 @@ def setUpClass(cls) -> None:
         cls.harness = Harness(
             task="ner",
             model={"model": "dslim/bert-base-NER", "hub": "huggingface"},
-            data=cls.data_path,
+            data={"data_source": cls.data_path},
             config=cls.config_path,
         )
 
@@ -39,7 +39,7 @@ def test_missing_parameter(self):
             Harness(
                 task="ner",
                 model={"model": "dslim/bert-base-NER"},
-                data=self.data_path,
+                data={"data_source": self.data_path},
                 config=self.config_path,
             )
 
@@ -93,7 +93,7 @@ def test_incompatible_tasks(self):
             Harness(
                 task="text-classifer",
                 model={"model": "dslim/bert-base-NER", "hub": "huggingface"},
-                data=self.data_path,
+                data={"data_source": self.data_path},
                 config=self.config_path,
             )
 
@@ -145,7 +145,7 @@ def test_load_text_classification(self):
         tc_harness = Harness(
             task="text-classification",
             model={"model": "bert-base-cased", "hub": "huggingface"},
-            data="tests/fixtures/text_classification.csv",
+            data={"data_source": "tests/fixtures/text_classification.csv"},
             config="tests/fixtures/config_text_classification.yaml",
         )
         tc_harness.generate()
@@ -168,7 +168,7 @@ def test_load_HF_data_text_classification(self):
         tc_harness = Harness(
             task="text-classification",
             model={"model": "aychang/roberta-base-imdb", "hub": "huggingface"},
-            data={"name": "imdb"},
+            data={"data_source": "imdb"},
         )
         tc_harness.data = tc_harness.data[:10]
         tc_harness.generate()
@@ -193,7 +193,7 @@ def test_harness_edit_import_testcases(self):
         harness = Harness(
             task="ner",
             model={"model": "bert-base-cased", "hub": "huggingface"},
-            data="tests/fixtures/test.conll",
+            data={"data_source": "tests/fixtures/test.conll"},
         )
         harness.data = harness.data[:10]
         harness.generate()

From de0aed489ccffba57d6c11e508aff5b4412f6c11 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Sat, 12 Aug 2023 15:58:54 +0530
Subject: [PATCH 14/31] ner_pipeline : updated data param as a dict

---
 langtest/pipelines/transformers/ner_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langtest/pipelines/transformers/ner_pipeline.py b/langtest/pipelines/transformers/ner_pipeline.py
index fb92c7506..14f8324f5 100644
--- a/langtest/pipelines/transformers/ner_pipeline.py
+++ b/langtest/pipelines/transformers/ner_pipeline.py
@@ -154,7 +154,7 @@ def test(self):
         self.harness = Harness(
             task=self.task,
             model={"model": self.output_dir, "hub": self.hub},
-            data=self.train_data,
+            data={"data_source": self.train_data},
         )
         if self.config:
             self.harness.configure(self.config)

From 270cb0601531a18bb833783cf8442272bef422d8 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Sat, 12 Aug 2023 16:03:43 +0530
Subject: [PATCH 15/31] Test: updated data as a dict

---
 tests/test_mlflow.py         | 2 +-
 tests/test_performance.py    | 6 +++---
 tests/test_spacy_model.py    | 2 +-
 tests/test_sparknlp_model.py | 2 +-
 tests/test_translation.py    | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/test_mlflow.py b/tests/test_mlflow.py
index 3eb5a18e7..b51a63642 100644
--- a/tests/test_mlflow.py
+++ b/tests/test_mlflow.py
@@ -17,7 +17,7 @@ def setUp(self) -> None:
         self.params = {
             "task": "ner",
             "model": {"model": "dslim/bert-base-NER", "hub": "huggingface"},
-            "data": "tests/fixtures/test.conll",
+            "data": {"data_source": "tests/fixtures/test.conll"},
             "config": "tests/fixtures/config_ner.yaml",
         }
 
diff --git a/tests/test_performance.py b/tests/test_performance.py
index 4e55871a3..5c4fe9637 100644
--- a/tests/test_performance.py
+++ b/tests/test_performance.py
@@ -10,19 +10,19 @@ def setUp(self) -> None:
             "spacy_ner": {
                 "task": "ner",
                 "model": {"model": "en_core_web_sm", "hub": "spacy"},
-                "data": "tests/fixtures/test.conll",
+                "data": {"data_source": "tests/fixtures/test.conll"},
                 "config": "tests/fixtures/config_performance.yaml",
             },
             "huggingface_ner": {
                 "task": "ner",
                 "model": {"model": "dslim/bert-base-NER", "hub": "huggingface"},
-                "data": "tests/fixtures/test.conll",
+                "data": {"data_source": "tests/fixtures/test.conll"},
                 "config": "tests/fixtures/config_performance.yaml",
             },
             "huggingface_textclassification": {
                 "task": "text-classification",
                 "model": {"model": "distilbert-base-uncased", "hub": "huggingface"},
-                "data": "tests/fixtures/text_classification.csv",
+                "data": {"data_source": "tests/fixtures/text_classification.csv"},
                 "config": "tests/fixtures/config_performance.yaml",
             },
         }
diff --git a/tests/test_spacy_model.py b/tests/test_spacy_model.py
index d9a5407c1..6e4ca6b91 100644
--- a/tests/test_spacy_model.py
+++ b/tests/test_spacy_model.py
@@ -17,7 +17,7 @@ def setUp(self) -> None:
         self.params = {
             "task": "ner",
             "model": {"model": "en_core_web_sm", "hub": "spacy"},
-            "data": "langtest/data/conll/sample.conll",
+            "data": {"data_source": "langtest/data/conll/sample.conll"},
             "config": "tests/fixtures/config_ner.yaml",
         }
 
diff --git a/tests/test_sparknlp_model.py b/tests/test_sparknlp_model.py
index 3bc41730e..4f989b1d2 100644
--- a/tests/test_sparknlp_model.py
+++ b/tests/test_sparknlp_model.py
@@ -13,7 +13,7 @@ def setUp(self) -> None:
         self.params = {
             "task": "ner",
             "model": {"model": "ner_dl_bert", "hub": "johnsnowlabs"},
-            "data": "tests/fixtures/test.conll",
+            "data": {"data_source": "tests/fixtures/test.conll"},
             "config": "tests/fixtures/config_ner.yaml",
         }
 
diff --git a/tests/test_translation.py b/tests/test_translation.py
index 9e4a69b3e..3ee33e697 100644
--- a/tests/test_translation.py
+++ b/tests/test_translation.py
@@ -14,7 +14,7 @@ def setUp(self) -> None:
         self.harness = Harness(
             task="translation",
             model={"model": "t5-base", "hub": "huggingface"},
-            data="Translation-test",
+            data={"data_source": "Translation-test"},
         )
 
         # configure the harness

From f19cf82bd313ccf6c37585e0d0a94b837b544a07 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Sun, 13 Aug 2023 23:08:33 +0530
Subject: [PATCH 16/31] test(test_harness.py): updated params

---
 tests/test_harness.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_harness.py b/tests/test_harness.py
index 8dc46e1a2..b680c619f 100644
--- a/tests/test_harness.py
+++ b/tests/test_harness.py
@@ -226,13 +226,13 @@ def test_non_existing_default(self):
         Test handling of non-existing default models.
         """
         with self.assertRaises(ValueError):
-            h = Harness(task="ner", model="xxxxxxxxx", hub="spacy")
+            h = Harness(task="ner", model={"model": "xxxxxxxxx", "hub": "spacy"}) 
 
     def test_ner_spacy(self):
         """
         Test NER task with Spacy model.
         """
-        h = Harness(task="ner", model="en_core_web_sm", hub="spacy")
+        h = Harness(task="ner", model={"model": "en_core_web_sm", "hub": "spacy"})
         h.generate().run().report()
 
     def test_ner_hf(self):

From e727c438317e6d443b14d15a7b3ef0bf467b1841 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Mon, 14 Aug 2023 12:13:47 +0530
Subject: [PATCH 17/31] fix : Lint

---
 langtest/datahandler/datasource.py | 2 +-
 langtest/langtest.py               | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index 29ec34141..2b4992801 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -1179,4 +1179,4 @@ def _row_to_ner_sample(self, data_row: dict) -> Sample:
         original = " ".join(tokens)
         return NERSample(
             original=original, expected_results=NEROutput(predictions=ner_labels)
-        )
\ No newline at end of file
+        )
diff --git a/langtest/langtest.py b/langtest/langtest.py
index a8df43938..cc4fce172 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -3,7 +3,7 @@
 import os
 import pickle
 from collections import defaultdict
-from typing import Any, Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union
 
 import pandas as pd
 import yaml
@@ -99,7 +99,7 @@ def __init__(
 
         Args:
             task (str, optional): Task for which the model is to be evaluated.
-            model (list | dict, optional): Specifies the model to be evaluated. 
+            model (list | dict, optional): Specifies the model to be evaluated.
                 If provided as a list, each element should be a dictionary with 'model' and 'hub' keys.
                 If provided as a dictionary, it must contain 'model' and 'hub' keys when specifying a path.
             data (dict, optional): The data to be used for evaluation.
@@ -151,7 +151,7 @@ def __init__(
 
         if data is None and (task, model, hub) in self.DEFAULTS_DATASET:
             data_path = os.path.join("data", self.DEFAULTS_DATASET[(task, model, hub)])
-            data = {"data_source":resource_filename("langtest", data_path)}
+            data = {"data_source": resource_filename("langtest", data_path)}
             self.data = DataFactory(data, task=self.task).load()
             if model == "textcat_imdb":
                 model = resource_filename("langtest", "data/textcat_imdb")

From 251643580d20221bb92e6b3e25fa2dccdfef660b Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Mon, 14 Aug 2023 12:17:43 +0530
Subject: [PATCH 18/31] fix: formatting

---
 tests/test_harness.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_harness.py b/tests/test_harness.py
index b680c619f..7d82af65d 100644
--- a/tests/test_harness.py
+++ b/tests/test_harness.py
@@ -226,7 +226,7 @@ def test_non_existing_default(self):
         Test handling of non-existing default models.
         """
         with self.assertRaises(ValueError):
-            h = Harness(task="ner", model={"model": "xxxxxxxxx", "hub": "spacy"}) 
+            h = Harness(task="ner", model={"model": "xxxxxxxxx", "hub": "spacy"})
 
     def test_ner_spacy(self):
         """

From 1895b3ee1cbd2f1668b204ba55105b3f02c7cac9 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Tue, 15 Aug 2023 07:52:52 +0530
Subject: [PATCH 19/31] updated param

---
 langtest/langtest.py  | 1 +
 tests/test_harness.py | 6 ++++--
 tests/test_mlflow.py  | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index cc4fce172..558029748 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -128,6 +128,7 @@ def __init__(
 
         if isinstance(model, dict):
             hub, model = model["hub"], model["model"]
+            self.hub = hub
             self._actual_model = model
         else:
             hub = None
diff --git a/tests/test_harness.py b/tests/test_harness.py
index 7d82af65d..860fa8d90 100644
--- a/tests/test_harness.py
+++ b/tests/test_harness.py
@@ -154,7 +154,8 @@ def test_load_text_classification(self):
         loaded_tc_harness = Harness.load(
             save_dir=save_dir,
             task="text-classification",
-            model={"model": "bert-base-cased", "hub": "huggingface"},
+            model="bert-base-cased",
+            hub="huggingface",
         )
         self.assertEqual(tc_harness._config, loaded_tc_harness._config)
         self.assertEqual(tc_harness.data, loaded_tc_harness.data)
@@ -177,7 +178,8 @@ def test_load_HF_data_text_classification(self):
         loaded_tc_harness = Harness.load(
             save_dir=save_dir,
             task="text-classification",
-            model={"model": "aychang/roberta-base-imdb", "hub": "huggingface"},
+            model="aychang/roberta-base-imdb",
+            hub="huggingface",
         )
         self.assertEqual(tc_harness._config, loaded_tc_harness._config)
         self.assertEqual(tc_harness.data, loaded_tc_harness.data)
diff --git a/tests/test_mlflow.py b/tests/test_mlflow.py
index b51a63642..16321c2a1 100644
--- a/tests/test_mlflow.py
+++ b/tests/test_mlflow.py
@@ -28,5 +28,5 @@ def test_mlflow(self):
         harness = Harness(**self.params)
         harness.data = harness.data[0:5]
         harness.generate().run().report(mlflow_tracking=True)
-        experiment_id = mlflow.get_experiment_by_name(self.params["model"])
+        experiment_id = mlflow.get_experiment_by_name(self.params["model"]["model"])
         self.assertIsNotNone(experiment_id)

From 9450bc6e861494503bcf6c1d61ed4a622408f4c0 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Tue, 15 Aug 2023 08:35:29 +0530
Subject: [PATCH 20/31] updated load method of Harness

---
 langtest/langtest.py  | 3 +--
 tests/test_harness.py | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index 558029748..4a198ff7e 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -1047,9 +1047,8 @@ def load(
 
         harness = Harness(
             task=task,
-            model=model,
+            model={"model": model, "hub": hub},
             data=data,
-            hub=hub,
             config=os.path.join(save_dir, "config.yaml"),
         )
         harness.generate()
diff --git a/tests/test_harness.py b/tests/test_harness.py
index 860fa8d90..7fbde65c6 100644
--- a/tests/test_harness.py
+++ b/tests/test_harness.py
@@ -105,7 +105,6 @@ def test_unsupported_test_for_task(self):
             h = Harness(
                 task="text-classification",
                 model={"model": "textcat_imdb", "hub": "spacy"},
-                hub="spacy",
                 config={
                     "tests": {"robustness": {"swap_entities": {"min_pass_rate": 0.5}}}
                 },

From b382322d297d020c3caf7ff3a73b87a2ce3cfa83 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Tue, 15 Aug 2023 09:57:23 +0530
Subject: [PATCH 21/31] updated param in langtest.py and
 augmentation/__init__.py

---
 langtest/augmentation/__init__.py | 4 ++--
 langtest/langtest.py              | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/langtest/augmentation/__init__.py b/langtest/augmentation/__init__.py
index 12fc92609..88b983c8c 100644
--- a/langtest/augmentation/__init__.py
+++ b/langtest/augmentation/__init__.py
@@ -122,7 +122,7 @@ def fix(
                 subset=training_data.get("subset", None),
             )
         else:
-            self.df = DataFactory(training_data["data_source"], self.task)
+            self.df = DataFactory(training_data, self.task)
             data = self.df.load()
         TestFactory.is_augment = True
         supported_tests = TestFactory.test_scenarios()
@@ -345,7 +345,7 @@ def fix(
         Returns:
             bool: Returns True upon successful completion of the method.
         """
-        df = DataFactory(training_data["data_source"], self.__task)
+        df = DataFactory(training_data, self.__task)
         data = df.load()
         new_data = []
         self.__search_results = self.search_sample_results(data)
diff --git a/langtest/langtest.py b/langtest/langtest.py
index 4a198ff7e..3fd3d55e6 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -1048,7 +1048,7 @@ def load(
         harness = Harness(
             task=task,
             model={"model": model, "hub": hub},
-            data=data,
+            data={"data_source": data},
             config=os.path.join(save_dir, "config.yaml"),
         )
         harness.generate()

From 3b9051c853798c2bc3295713588fdbbd22d440df Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Tue, 15 Aug 2023 12:17:23 +0530
Subject: [PATCH 22/31] fix: Datafactory file_path

---
 langtest/datahandler/datasource.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index 2b4992801..5de79e9d2 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -101,7 +101,7 @@ def __init__(self, file_path: str, task: str, **kwargs) -> None:
             file_path (str): Path to the dataset.
             task (str): Task to be evaluated.
         """
-        self._file_path = file_path["data_source"]
+        self._file_path = file_path
         self._class_map = {
             cls.__name__.replace("Dataset", "").lower(): cls
             for cls in _IDataset.__subclasses__()

From 0b64e524e58843bfaf7550eb800dbf8931f0ebbb Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Tue, 15 Aug 2023 12:28:51 +0530
Subject: [PATCH 23/31] fix: DataFactory file_path as a dict object

---
 langtest/datahandler/datasource.py | 6 +++---
 tests/test_harness.py              | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index 5de79e9d2..c204eb081 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -94,14 +94,14 @@ class DataFactory:
     correct Dataset type based on the file extension.
     """
 
-    def __init__(self, file_path: str, task: str, **kwargs) -> None:
+    def __init__(self, file_path: dict, task: str, **kwargs) -> None:
         """Initializes DataFactory object.
 
         Args:
-            file_path (str): Path to the dataset.
+            file_path (dict): Dictionary containing 'data_source' key with the path to the dataset.
             task (str): Task to be evaluated.
         """
-        self._file_path = file_path
+        self._file_path = file_path.get("data_source")
         self._class_map = {
             cls.__name__.replace("Dataset", "").lower(): cls
             for cls in _IDataset.__subclasses__()
diff --git a/tests/test_harness.py b/tests/test_harness.py
index 7fbde65c6..38ab364cb 100644
--- a/tests/test_harness.py
+++ b/tests/test_harness.py
@@ -211,7 +211,7 @@ def test_harness_edit_import_testcases(self):
         df.to_csv(save_dir + "/test_cases.csv", index=False)
 
         # import the testcases
-        harness.import_edited_testcases(save_dir + "/test_cases.csv")
+        harness.import_edited_testcases({"data_source": save_dir + "/test_cases.csv"})
 
         # test working of the harness
         harness.run().report()

From ea591ecf4f8930e9d88cfd237dad4e57488a3888 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Tue, 15 Aug 2023 13:00:14 +0530
Subject: [PATCH 24/31] ner_pipeline.py: param updated

---
 langtest/langtest.py                            |  4 ++--
 langtest/pipelines/transformers/ner_pipeline.py | 10 +++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index 3fd3d55e6..9c5771ce6 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -1067,11 +1067,11 @@ def edit_testcases(self, output_path: str, **kwargs):
         temp_df = temp_df[temp_df["category"].isin(["robustness", "bias"])]
         temp_df.to_csv(output_path, index=False)
 
-    def import_edited_testcases(self, input_path: str, **kwargs):
+    def import_edited_testcases(self, input_path: dict, **kwargs):
         """Testcases are imported from a csv file
 
         Args:
-            input_path (str): location of the file to load
+            input_path (dict): location of the file to load
         """
         temp_testcases = [
             sample
diff --git a/langtest/pipelines/transformers/ner_pipeline.py b/langtest/pipelines/transformers/ner_pipeline.py
index 14f8324f5..b249be34b 100644
--- a/langtest/pipelines/transformers/ner_pipeline.py
+++ b/langtest/pipelines/transformers/ner_pipeline.py
@@ -84,8 +84,12 @@ def setup(self):
         self.hub = "huggingface"
         self.output_dir = "checkpoints/"
 
-        self.train_datasource = DataFactory(file_path=self.train_data, task=self.task)
-        self.eval_datasource = DataFactory(file_path=self.eval_data, task=self.task)
+        self.train_datasource = DataFactory(
+            file_path={"data_source": self.train_data}, task=self.task
+        )
+        self.eval_datasource = DataFactory(
+            file_path={"data_source": self.eval_data}, task=self.task
+        )
 
         self.next(self.train)
 
@@ -183,7 +187,7 @@ def augment(self):
     def retrain(self):
         """Performs the training procedure using the augmented data created by langtest"""
         self.augmented_train_datasource = DataFactory(
-            file_path=self.path_augmented_file, task=self.task
+            file_path={"data_source": self.path_augmented_file}, task=self.task
         )
         samples = self.augmented_train_datasource.load_raw()
 

From 491ac1700932103d56711b9a1826df68d5d0efb2 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Tue, 15 Aug 2023 15:26:58 +0530
Subject: [PATCH 25/31] langtest.py: updated self.data as list

---
 langtest/langtest.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index 9c5771ce6..3bee709d6 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -160,7 +160,7 @@ def __init__(
             logging.info("Default dataset '%s' successfully loaded.", (task, model, hub))
 
         elif (
-            isinstance(data, dict)
+            isinstance(data["data_source"], str)
             and "." not in data["data_source"]
             and hub in self.SUPPORTED_HUBS_HF_DATASET_CLASSIFICATION
             and task == "text-classification"
@@ -184,7 +184,7 @@ def __init__(
                 model = resource_filename("langtest", "data/textcat_imdb")
 
         elif (
-            isinstance(data, dict)
+            isinstance(data["data_source"], str)
             and "." not in data["data_source"]
             and hub in self.SUPPORTED_HUBS_HF_DATASET_NER
             and task == "ner"
@@ -197,7 +197,7 @@ def __init__(
             )
 
         elif (
-            isinstance(data, dict)
+            isinstance(data["data_source"], str)
             and "." not in data["data_source"]
             and hub in self.SUPPORTED_HUBS_HF_DATASET_SUMMARIZATION
             and task == "summarization"
@@ -215,6 +215,8 @@ def __init__(
                 "passed is not among the default ones. You need to either specify the parameter 'data' "
                 "or use a default configuration."
             )
+        elif isinstance(data["data_source"], list):
+            self.data = data["data_source"]
         else:
             self.file_path = data
             self.data = (

From 721aba5b270a7c82772b69bdbe6a3728886783ba Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Tue, 15 Aug 2023 16:56:49 +0530
Subject: [PATCH 26/31] langtest.py: updated the condtion

---
 langtest/langtest.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index 3bee709d6..275621764 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -160,7 +160,8 @@ def __init__(
             logging.info("Default dataset '%s' successfully loaded.", (task, model, hub))
 
         elif (
-            isinstance(data["data_source"], str)
+            isinstance(data, dict)
+            and isinstance(data["data_source"], str)
             and "." not in data["data_source"]
             and hub in self.SUPPORTED_HUBS_HF_DATASET_CLASSIFICATION
             and task == "text-classification"
@@ -184,7 +185,8 @@ def __init__(
                 model = resource_filename("langtest", "data/textcat_imdb")
 
         elif (
-            isinstance(data["data_source"], str)
+            isinstance(data, dict)
+            and isinstance(data["data_source"], str)
             and "." not in data["data_source"]
             and hub in self.SUPPORTED_HUBS_HF_DATASET_NER
             and task == "ner"
@@ -197,7 +199,8 @@ def __init__(
             )
 
         elif (
-            isinstance(data["data_source"], str)
+            isinstance(data, dict)
+            and isinstance(data["data_source"], str)
             and "." not in data["data_source"]
             and hub in self.SUPPORTED_HUBS_HF_DATASET_SUMMARIZATION
             and task == "summarization"

From 57cd30fe74fe0c0988ae181f7ba51679e9f9593e Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Thu, 17 Aug 2023 10:43:38 +0530
Subject: [PATCH 27/31] fix: Bias datasets are working

---
 langtest/langtest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index 3fd3d55e6..0e71c1040 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -216,7 +216,7 @@ def __init__(
                 "or use a default configuration."
             )
         else:
-            self.file_path = data
+            self.file_path = data["data_source"]
             self.data = (
                 DataFactory(data, task=self.task).load() if data is not None else None
             )

From 5125d2a320113e82b74decf4f68e2f08035eb7f1 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Thu, 17 Aug 2023 14:43:39 +0530
Subject: [PATCH 28/31] added source param for HuggingFaceDataset

---
 langtest/augmentation/__init__.py |  3 +-
 langtest/langtest.py              | 83 +++++++++++++++----------------
 tests/test_augmentation.py        |  4 +-
 tests/test_harness.py             |  4 +-
 4 files changed, 45 insertions(+), 49 deletions(-)

diff --git a/langtest/augmentation/__init__.py b/langtest/augmentation/__init__.py
index 88b983c8c..e2f13f0ad 100644
--- a/langtest/augmentation/__init__.py
+++ b/langtest/augmentation/__init__.py
@@ -113,7 +113,8 @@ def fix(
         Returns:
             List[Dict[str, Any]]: A list of augmented data samples.
         """
-        if "." not in training_data["data_source"]:
+
+        if "source" in data and data["source"] == "huggingface":
             self.df = HuggingFaceDataset(training_data["data_source"], self.task)
             data = self.df.load_data(
                 feature_column=training_data.get("feature_column", "text"),
diff --git a/langtest/langtest.py b/langtest/langtest.py
index 7c4be6ace..c14845661 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -161,56 +161,45 @@ def __init__(
 
         elif (
             isinstance(data, dict)
-            and isinstance(data["data_source"], str)
-            and "." not in data["data_source"]
-            and hub in self.SUPPORTED_HUBS_HF_DATASET_CLASSIFICATION
-            and task == "text-classification"
+            and "source" in data
+            and data["source"] == "huggingface"
         ):
-            self.data = (
-                HuggingFaceDataset(data["data_source"], task=task).load_data(
+            if (
+                task == "text-classification"
+                and hub in self.SUPPORTED_HUBS_HF_DATASET_CLASSIFICATION
+            ):
+                self.data = HuggingFaceDataset(data["data_source"], task=task).load_data(
                     feature_column=data.get("feature_column", "text"),
                     target_column=data.get("target_column", "label"),
                     split=data.get("split", "test"),
                     subset=data.get("subset", None),
                 )
-                if data is not None
-                else None
-            )
 
-            if hub == "spacy" and (model == "textcat_imdb" or model is None):
-                if model is None:
-                    logging.warning(
-                        "Using the default 'textcat_imdb' model for Spacy hub. Please provide a custom model path if desired."
-                    )
-                model = resource_filename("langtest", "data/textcat_imdb")
+                if hub == "spacy" and (model == "textcat_imdb" or model is None):
+                    if model is None:
+                        logging.warning(
+                            "Using the default 'textcat_imdb' model for Spacy hub. Please provide a custom model path if desired."
+                        )
+                    model = resource_filename("langtest", "data/textcat_imdb")
 
-        elif (
-            isinstance(data, dict)
-            and isinstance(data["data_source"], str)
-            and "." not in data["data_source"]
-            and hub in self.SUPPORTED_HUBS_HF_DATASET_NER
-            and task == "ner"
-        ):
-            self.data = HuggingFaceDataset(data["data_source"], task=task).load_data(
-                feature_column=data.get("feature_column", "tokens"),
-                target_column=data.get("target_column", "ner_tags"),
-                split=data.get("split", "test"),
-                subset=data.get("subset", None),
-            )
+            elif task == "ner" and hub in self.SUPPORTED_HUBS_HF_DATASET_NER:
+                self.data = HuggingFaceDataset(data["data_source"], task=task).load_data(
+                    feature_column=data.get("feature_column", "tokens"),
+                    target_column=data.get("target_column", "ner_tags"),
+                    split=data.get("split", "test"),
+                    subset=data.get("subset", None),
+                )
 
-        elif (
-            isinstance(data, dict)
-            and isinstance(data["data_source"], str)
-            and "." not in data["data_source"]
-            and hub in self.SUPPORTED_HUBS_HF_DATASET_SUMMARIZATION
-            and task == "summarization"
-        ):
-            self.data = HuggingFaceDataset(data["data_source"], task=task).load_data(
-                feature_column=data.get("feature_column", "document"),
-                target_column=data.get("target_column", "summary"),
-                split=data.get("split", "test"),
-                subset=data.get("subset", None),
-            )
+            elif (
+                task == "summarization"
+                and hub in self.SUPPORTED_HUBS_HF_DATASET_SUMMARIZATION
+            ):
+                self.data = HuggingFaceDataset(data["data_source"], task=task).load_data(
+                    feature_column=data.get("feature_column", "document"),
+                    target_column=data.get("target_column", "summary"),
+                    split=data.get("split", "test"),
+                    subset=data.get("subset", None),
+                )
 
         elif data is None and (task, model, hub) not in self.DEFAULTS_DATASET.keys():
             raise ValueError(
@@ -221,6 +210,10 @@ def __init__(
         elif isinstance(data["data_source"], list):
             self.data = data["data_source"]
         else:
+            if "data_source" not in data:
+                raise ValueError(
+                    "The 'data_source' key must be provided in the 'data' parameter."
+                )
             self.file_path = data["data_source"]
             self.data = (
                 DataFactory(data, task=self.task).load() if data is not None else None
@@ -1072,11 +1065,11 @@ def edit_testcases(self, output_path: str, **kwargs):
         temp_df = temp_df[temp_df["category"].isin(["robustness", "bias"])]
         temp_df.to_csv(output_path, index=False)
 
-    def import_edited_testcases(self, input_path: dict, **kwargs):
+    def import_edited_testcases(self, input_path: str, **kwargs):
         """Testcases are imported from a csv file
 
         Args:
-            input_path (dict): location of the file to load
+            input_path (str): location of the file to load
         """
         temp_testcases = [
             sample
@@ -1084,7 +1077,9 @@ def import_edited_testcases(self, input_path: dict, **kwargs):
             if sample.category not in ["robustness", "bias"]
         ]
 
-        self._testcases = DataFactory(input_path, task=self.task, is_import=True).load()
+        self._testcases = DataFactory(
+            {"data_source": input_path}, task=self.task, is_import=True
+        ).load()
         self._testcases.extend(temp_testcases)
 
         return self
diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index 229847df8..5a90ea9f3 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -41,13 +41,13 @@ def setUp(self) -> None:
             "spacy_textclassification_hf_dataset": {
                 "task": "text-classification",
                 "model": {"model": "textcat_imdb", "hub": "spacy"},
-                "data": {"data_source": "imdb"},
+                "data": {"data_source": "imdb", "source": "huggingface"},
                 "config": "tests/fixtures/config_text_classification.yaml",
             },
             "huggingface_textclassification_hf_dataset": {
                 "task": "text-classification",
                 "model": {"model": "lvwerra/distilbert-imdb", "hub": "huggingface"},
-                "data": {"data_source": "imdb"},
+                "data": {"data_source": "imdb", "source": "huggingface"},
                 "config": "tests/fixtures/config_text_classification.yaml",
             },
         }
diff --git a/tests/test_harness.py b/tests/test_harness.py
index 38ab364cb..36f43f146 100644
--- a/tests/test_harness.py
+++ b/tests/test_harness.py
@@ -168,7 +168,7 @@ def test_load_HF_data_text_classification(self):
         tc_harness = Harness(
             task="text-classification",
             model={"model": "aychang/roberta-base-imdb", "hub": "huggingface"},
-            data={"data_source": "imdb"},
+            data={"data_source": "imdb", "source": "huggingface"},
         )
         tc_harness.data = tc_harness.data[:10]
         tc_harness.generate()
@@ -211,7 +211,7 @@ def test_harness_edit_import_testcases(self):
         df.to_csv(save_dir + "/test_cases.csv", index=False)
 
         # import the testcases
-        harness.import_edited_testcases({"data_source": save_dir + "/test_cases.csv"})
+        harness.import_edited_testcases(save_dir + "/test_cases.csv")
 
         # test working of the harness
         harness.run().report()

From 538063eb4d80f15625e046e7529f4b85a0f2d8a2 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Thu, 17 Aug 2023 14:54:02 +0530
Subject: [PATCH 29/31] updated augmentation/__init__.py

---
 langtest/augmentation/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langtest/augmentation/__init__.py b/langtest/augmentation/__init__.py
index e2f13f0ad..e84ffdf71 100644
--- a/langtest/augmentation/__init__.py
+++ b/langtest/augmentation/__init__.py
@@ -114,7 +114,7 @@ def fix(
             List[Dict[str, Any]]: A list of augmented data samples.
         """
 
-        if "source" in data and data["source"] == "huggingface":
+        if "source" in training_data and training_data["source"] == "huggingface":
             self.df = HuggingFaceDataset(training_data["data_source"], self.task)
             data = self.df.load_data(
                 feature_column=training_data.get("feature_column", "text"),

From 2a9fea9cbb755b5224c8f8a7dcbc5e25baf57b00 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Thu, 17 Aug 2023 15:15:08 +0530
Subject: [PATCH 30/31] test(test_augmentation.py):added source param

---
 tests/test_augmentation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index 5a90ea9f3..caba3df32 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -202,7 +202,7 @@ def test_hf_dataset_textclassification_hf(self):
         self.assertIsInstance(report, pd.DataFrame)
         custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
-            training_data={"data_source": "imdb"},
+            training_data={"data_source": "imdb", "source": "huggingface"},
             save_data_path="tests/fixtures/augmented_train_transformed.csv",
             custom_proportions=custom_proportions,
             export_mode="transformed",
@@ -222,7 +222,7 @@ def test_hf_dataset_textclassification_spacy(self):
         self.assertIsInstance(report, pd.DataFrame)
         custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
-            training_data={"data_source": "imdb"},
+            training_data={"data_source": "imdb", "source": "huggingface"},
             save_data_path="tests/fixtures/augmented_train_transformed.csv",
             custom_proportions=custom_proportions,
             export_mode="transformed",

From 1affd086b8f5e7f7b88a221e9035cff2d579c203 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Thu, 17 Aug 2023 15:45:19 +0530
Subject: [PATCH 31/31] datasource.py: added ValueError

---
 langtest/datahandler/datasource.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index c204eb081..2bf448bcb 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -101,6 +101,13 @@ def __init__(self, file_path: dict, task: str, **kwargs) -> None:
             file_path (dict): Dictionary containing 'data_source' key with the path to the dataset.
             task (str): Task to be evaluated.
         """
+        if not isinstance(file_path, dict):
+            raise ValueError("'file_path' must be a dictionary.")
+
+        if "data_source" not in file_path:
+            raise ValueError(
+                "The 'data_source' key must be provided in the 'file_path' dictionary."
+            )
         self._file_path = file_path.get("data_source")
         self._class_map = {
             cls.__name__.replace("Dataset", "").lower(): cls