From 1a149af1935b7ba0997d6cdd3f1f4f3c9433b03f Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Mon, 4 Sep 2023 16:26:38 +0530
Subject: [PATCH 1/3] transform\utils.py: updated filter_unique_samples

---
 langtest/transform/utils.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/langtest/transform/utils.py b/langtest/transform/utils.py
index 7d127b00c..65384fb5c 100644
--- a/langtest/transform/utils.py
+++ b/langtest/transform/utils.py
@@ -364,10 +364,10 @@ def filter_unique_samples(task: str, transformed_samples: list, test_name: str):
 
     Returns:
         new_transformed_samples (list): List of filtered samples with unique transformations.
-        no_transformation_applied_tests (set): Set of test names for which no transformation
-            was applied due to non-uniqueness.
+        no_transformation_applied_tests (dict): A dictionary where keys are test names and
+            values are the number of samples removed from each test.
     """
-    no_transformation_applied_tests = set()
+    no_transformation_applied_tests = {}
     new_transformed_samples = []
     if task == "question-answering":
         for sample in transformed_samples:
@@ -383,9 +383,15 @@ def filter_unique_samples(task: str, transformed_samples: list, test_name: str):
                 new_transformed_samples.append(sample)
             else:
                 if test_name == "multiple_perturbations":
-                    no_transformation_applied_tests.add(sample.test_type)
+                    if sample.test_type in no_transformation_applied_tests:
+                        no_transformation_applied_tests[sample.test_type] += 1
+                    else:
+                        no_transformation_applied_tests[sample.test_type] = 1
                 else:
-                    no_transformation_applied_tests.add(test_name)
+                    if test_name in no_transformation_applied_tests:
+                        no_transformation_applied_tests[test_name] += 1
+                    else:
+                        no_transformation_applied_tests[test_name] = 1
     else:
         for sample in transformed_samples:
             if sample.original.replace(" ", "") != sample.test_case.replace(" ", ""):
@@ -394,8 +400,14 @@ def filter_unique_samples(task: str, transformed_samples: list, test_name: str):
                 new_transformed_samples.append(sample)
             else:
                 if test_name == "multiple_perturbations":
-                    no_transformation_applied_tests.add(sample.test_type)
+                    if sample.test_type in no_transformation_applied_tests:
+                        no_transformation_applied_tests[sample.test_type] += 1
+                    else:
+                        no_transformation_applied_tests[sample.test_type] = 1
                 else:
-                    no_transformation_applied_tests.add(test_name)
+                    if test_name in no_transformation_applied_tests:
+                        no_transformation_applied_tests[test_name] += 1
+                    else:
+                        no_transformation_applied_tests[test_name] = 1
 
     return new_transformed_samples, no_transformation_applied_tests

From 1fb52d47277e194c66dc9f2a0d0b07f4507eb00b Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Mon, 4 Sep 2023 16:28:09 +0530
Subject: [PATCH 2/3] transform\__init__.py: updated warning_message

---
 langtest/transform/__init__.py | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/langtest/transform/__init__.py b/langtest/transform/__init__.py
index f08234476..d48b9d28b 100644
--- a/langtest/transform/__init__.py
+++ b/langtest/transform/__init__.py
@@ -371,7 +371,7 @@ def transform(self) -> List[Sample]:
                 A list of `Sample` objects representing the resulting dataset after running the robustness test.
         """
         all_samples = []
-        no_transformation_applied_tests = set()
+        no_transformation_applied_tests = {}
         tests_copy = self.tests.copy()
         for test_name, params in tests_copy.items():
             if TestFactory.is_augment:
@@ -505,14 +505,18 @@ def transform(self) -> List[Sample]:
             new_transformed_samples, removed_samples_tests = filter_unique_samples(
                 TestFactory.task, transformed_samples, test_name
             )
-            no_transformation_applied_tests.update(removed_samples_tests)
             all_samples.extend(new_transformed_samples)
 
+            no_transformation_applied_tests.update(removed_samples_tests)
+
         if no_transformation_applied_tests:
-            logging.warning(
-                "Removing samples where no transformation has been applied in the following tests: "
-                + ", ".join(no_transformation_applied_tests)
+            warning_message = (
+                "Removing samples where no transformation has been applied:\n"
             )
+            for test, count in no_transformation_applied_tests.items():
+                warning_message += f"- Test '{test}': {count} samples removed out of {len(self._data_handler)}\n"
+
+            logging.warning(warning_message)
 
         return all_samples
 
@@ -689,7 +693,7 @@ def transform(self) -> List[Sample]:
                 A list of `Sample` objects representing the resulting dataset after running the bias test.
         """
         all_samples = []
-        no_transformation_applied_tests = set()
+        no_transformation_applied_tests = {}
         for test_name, params in self.tests.items():
             data_handler_copy = [x.copy() for x in self._data_handler]
 
@@ -700,14 +704,18 @@ def transform(self) -> List[Sample]:
             new_transformed_samples, removed_samples_tests = filter_unique_samples(
                 TestFactory.task, transformed_samples, test_name
             )
-            no_transformation_applied_tests.update(removed_samples_tests)
             all_samples.extend(new_transformed_samples)
 
+            no_transformation_applied_tests.update(removed_samples_tests)
+
         if no_transformation_applied_tests:
-            logging.warning(
-                "Removing samples where no transformation has been applied in the following tests: "
-                + ", ".join(no_transformation_applied_tests)
+            warning_message = (
+                "Removing samples where no transformation has been applied:\n"
             )
+            for test, count in no_transformation_applied_tests.items():
+                warning_message += f"- Test '{test}': {count} samples removed out of {len(self._data_handler)}\n"
+
+            logging.warning(warning_message)
 
         return all_samples
 

From d48941339b6bb3ad8457e33d405c584117f378d6 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Mon, 4 Sep 2023 17:31:59 +0530
Subject: [PATCH 3/3] website(data.md): minor fix

---
 docs/pages/docs/data.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/pages/docs/data.md b/docs/pages/docs/data.md
index b8bc06119..0c7ce75f2 100644
--- a/docs/pages/docs/data.md
+++ b/docs/pages/docs/data.md
@@ -41,7 +41,7 @@ Supported `data_source` formats are task-dependent. The following table provides
 | - | - | 
 |**ner**     |CoNLL, CSV and HuggingFace Datasets|
 |**text-classification**     |CSV and HuggingFace Datsets
-|**question-answering**     |Select list of benchmark datasets or HuggingFace Datsets
+|**question-answering**     |Select list of benchmark datasets
 |**summarization**     |Select list of benchmark datasets or HuggingFace Datsets
 |**toxicity**     |Select list of benchmark datasets
 |**clinical-tests**     |Select list of curated datasets