PacificAI · ArshaanNazir · Sep 4, 2023 · Sep 4, 2023 · Sep 4, 2023 · Sep 4, 2023
diff --git a/docs/pages/docs/data.md b/docs/pages/docs/data.md
@@ -41,7 +41,7 @@ Supported `data_source` formats are task-dependent. The following table provides
 | ----------------------- | -------------------------------------------------------- |
 | **ner**                 | CoNLL, CSV and HuggingFace Datasets                      |
 | **text-classification** | CSV and HuggingFace Datsets                              |
-| **question-answering**  | Select list of benchmark datasets or HuggingFace Datsets |
+| **question-answering**  | Select list of benchmark datasets                        |
 | **summarization**       | Select list of benchmark datasets or HuggingFace Datsets |
 | **toxicity**            | Select list of benchmark datasets                        |
 | **clinical-tests**      | Select list of curated datasets                          |

diff --git a/langtest/transform/__init__.py b/langtest/transform/__init__.py
@@ -371,7 +371,7 @@ def transform(self) -> List[Sample]:
                 A list of `Sample` objects representing the resulting dataset after running the robustness test.
         """
         all_samples = []
-        no_transformation_applied_tests = set()
+        no_transformation_applied_tests = {}
         tests_copy = self.tests.copy()
         for test_name, params in tests_copy.items():
             if TestFactory.is_augment:
@@ -505,14 +505,18 @@ def transform(self) -> List[Sample]:
             new_transformed_samples, removed_samples_tests = filter_unique_samples(
                 TestFactory.task, transformed_samples, test_name
             )
-            no_transformation_applied_tests.update(removed_samples_tests)
             all_samples.extend(new_transformed_samples)
 
+            no_transformation_applied_tests.update(removed_samples_tests)
+
         if no_transformation_applied_tests:
-            logging.warning(
-                "Removing samples where no transformation has been applied in the following tests: "
-                + ", ".join(no_transformation_applied_tests)
+            warning_message = (
+                "Removing samples where no transformation has been applied:\n"
             )
+            for test, count in no_transformation_applied_tests.items():
+                warning_message += f"- Test '{test}': {count} samples removed out of {len(self._data_handler)}\n"
+
+            logging.warning(warning_message)
 
         return all_samples
 
@@ -689,7 +693,7 @@ def transform(self) -> List[Sample]:
                 A list of `Sample` objects representing the resulting dataset after running the bias test.
         """
         all_samples = []
-        no_transformation_applied_tests = set()
+        no_transformation_applied_tests = {}
         for test_name, params in self.tests.items():
             data_handler_copy = [x.copy() for x in self._data_handler]
 
@@ -700,14 +704,18 @@ def transform(self) -> List[Sample]:
             new_transformed_samples, removed_samples_tests = filter_unique_samples(
                 TestFactory.task, transformed_samples, test_name
             )
-            no_transformation_applied_tests.update(removed_samples_tests)
             all_samples.extend(new_transformed_samples)
 
+            no_transformation_applied_tests.update(removed_samples_tests)
+
         if no_transformation_applied_tests:
-            logging.warning(
-                "Removing samples where no transformation has been applied in the following tests: "
-                + ", ".join(no_transformation_applied_tests)
+            warning_message = (
+                "Removing samples where no transformation has been applied:\n"
             )
+            for test, count in no_transformation_applied_tests.items():
+                warning_message += f"- Test '{test}': {count} samples removed out of {len(self._data_handler)}\n"
+
+            logging.warning(warning_message)
 
         return all_samples
 

diff --git a/langtest/transform/utils.py b/langtest/transform/utils.py
@@ -364,10 +364,10 @@ def filter_unique_samples(task: str, transformed_samples: list, test_name: str):
 
     Returns:
         new_transformed_samples (list): List of filtered samples with unique transformations.
-        no_transformation_applied_tests (set): Set of test names for which no transformation
-            was applied due to non-uniqueness.
+        no_transformation_applied_tests (dict): A dictionary where keys are test names and
+            values are the number of samples removed from each test.
     """
-    no_transformation_applied_tests = set()
+    no_transformation_applied_tests = {}
     new_transformed_samples = []
     if task == "question-answering":
         for sample in transformed_samples:
@@ -383,9 +383,15 @@ def filter_unique_samples(task: str, transformed_samples: list, test_name: str):
                 new_transformed_samples.append(sample)
             else:
                 if test_name == "multiple_perturbations":
-                    no_transformation_applied_tests.add(sample.test_type)
+                    if sample.test_type in no_transformation_applied_tests:
+                        no_transformation_applied_tests[sample.test_type] += 1
+                    else:
+                        no_transformation_applied_tests[sample.test_type] = 1
                 else:
-                    no_transformation_applied_tests.add(test_name)
+                    if test_name in no_transformation_applied_tests:
+                        no_transformation_applied_tests[test_name] += 1
+                    else:
+                        no_transformation_applied_tests[test_name] = 1
     else:
         for sample in transformed_samples:
             if sample.original.replace(" ", "") != sample.test_case.replace(" ", ""):
@@ -394,8 +400,14 @@ def filter_unique_samples(task: str, transformed_samples: list, test_name: str):
                 new_transformed_samples.append(sample)
             else:
                 if test_name == "multiple_perturbations":
-                    no_transformation_applied_tests.add(sample.test_type)
+                    if sample.test_type in no_transformation_applied_tests:
+                        no_transformation_applied_tests[sample.test_type] += 1
+                    else:
+                        no_transformation_applied_tests[sample.test_type] = 1
                 else:
-                    no_transformation_applied_tests.add(test_name)
+                    if test_name in no_transformation_applied_tests:
+                        no_transformation_applied_tests[test_name] += 1
+                    else:
+                        no_transformation_applied_tests[test_name] = 1
 
     return new_transformed_samples, no_transformation_applied_tests