From d6156c6c202ad30b1806b440016eee3e95f48a63 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Tue, 8 Aug 2023 18:23:07 +0530
Subject: [PATCH 1/6] test(test_robustness): setup new test class

---
 tests/test_robustness.py | 50 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/tests/test_robustness.py b/tests/test_robustness.py
index 8b40bb0e6..11f91a3dd 100644
--- a/tests/test_robustness.py
+++ b/tests/test_robustness.py
@@ -2,6 +2,8 @@
 from langtest.transform.robustness import *
 from langtest.transform.constants import A2B_DICT
 from langtest.utils.custom_types import SequenceClassificationSample
+from langtest.utils.custom_types.sample import QASample, SummarizationSample
+from langtest.transform import TestFactory
 
 
 class RobustnessTestCase(unittest.TestCase):
@@ -434,3 +436,51 @@ def test_random_age(self) -> None:
         self.assertIsInstance(transformed_samples, list)
         for sample in transformed_samples:
             self.assertNotEqual(sample.test_case, sample.original)
+
+
+class RobustnessTestCaseQaAndSummarization(unittest.TestCase):
+    """
+    A test case class for testing QA and summarization samples on robustness classes.
+    """
+
+    def available_test(self) -> dict:
+        """
+        Get a dictionary of available robustness tests.
+
+        Returns:
+            dict: A dictionary containing available robustness tests.
+        """
+        tests = {
+            j: i
+            for i in BaseRobustness.__subclasses__()
+            for j in (i.alias_name if isinstance(i.alias_name, list) else [i.alias_name])
+        }
+        return tests
+
+    def setUp(self) -> None:
+        """
+        Set up the test environment before each test.
+
+        Returns:
+            None
+        """
+        test_scenarios = TestFactory.test_scenarios()
+        self.available_tests = {
+            test: list(scenarios.keys()) for test, scenarios in test_scenarios.items()
+        }
+
+        self.perturbations_list = self.available_tests["robustness"]
+        self.supported_tests = self.available_test()
+        self.samples = {
+            "question-answering": [
+                QASample(
+                    original_question="What is John Snow Labs?",
+                    original_context="John Snow Labs is a healthcare company specializing in accelerating progress in data science.",
+                )
+            ],
+            "summarization": [
+                SummarizationSample(
+                    original="John Snow Labs is a healthcare company specializing in accelerating progress in data science.",
+                )
+            ],
+        }

From 4a0971a21f056c5ff6fe6721bdfec68d9b003ae9 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Tue, 8 Aug 2023 18:34:28 +0530
Subject: [PATCH 2/6] test(test_robustness.py): added unittest

---
 tests/test_robustness.py | 53 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/tests/test_robustness.py b/tests/test_robustness.py
index 11f91a3dd..ff4b0e481 100644
--- a/tests/test_robustness.py
+++ b/tests/test_robustness.py
@@ -484,3 +484,56 @@ def setUp(self) -> None:
                 )
             ],
         }
+
+    def test(self) -> None:
+        """
+        Test QA and summarization sample for robustness classes.
+
+        Returns:
+            None
+        """
+        prob = 1.0
+        for test in self.perturbations_list:
+            for s in self.samples:
+                sample = self.samples[s][-1]
+                test_func = self.supported_tests[test].transform
+
+                if test not in [
+                    "swap_entities",
+                    "american_to_british",
+                    "british_to_american",
+                    "add_context",
+                    "multiple_perturbations",
+                ]:
+                    sample.transform(test_func, {}, prob)
+                elif test in ["american_to_british", "british_to_american"]:
+                    sample.transform(test_func, {"accent_map": A2B_DICT}, prob)
+                elif test == "add_context":
+                    sample.transform(
+                        test_func,
+                        {
+                            "ending_context": ["Bye", "Reported"],
+                            "starting_context": ["Hi", "Good morning", "hello"],
+                        },
+                        prob,
+                    )
+                elif test == "multiple_perturbations":
+                    sample.transform(
+                        test_func,
+                        {},
+                        prob,
+                        perturbations=[
+                            "lowercase",
+                            "add_ocr_typo",
+                            "titlecase",
+                            "number_to_word",
+                        ],
+                    )
+
+                if s == "question-answering":
+                    assert (
+                        sample.perturbed_question is not None
+                        and sample.perturbed_context is not None
+                    )
+                else:
+                    assert sample.test_case is not None

From c93d40903679aeb64f078a7c6cf8b60098306ee3 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Tue, 8 Aug 2023 18:43:59 +0530
Subject: [PATCH 3/6] refacto(accuracy.py): Added checks

---
 langtest/transform/accuracy.py | 50 +++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/langtest/transform/accuracy.py b/langtest/transform/accuracy.py
index 3c12d5bbb..133ca238a 100644
--- a/langtest/transform/accuracy.py
+++ b/langtest/transform/accuracy.py
@@ -19,7 +19,7 @@ class BaseAccuracy(ABC):
     alias_name = None
     supported_tasks = ["ner", "text-classification"]
 
-    @staticmethod
+    @classmethod
     @abstractmethod
     def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
         """Abstract method that implements the accuracy measure.
@@ -77,10 +77,12 @@ class MinPrecisionScore(BaseAccuracy):
         transform(y_true, y_pred) -> Any: Creates accuracy test results.
     """
 
-    alias_name = "min_precision_score"
+    alias_name = ["min_precision_score"]
 
-    @staticmethod
-    def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, y_true: List[Any], params: Dict
+    ) -> List[MinScoreSample]:
         """Computes the minimum precision score for the given data.
 
         Args:
@@ -90,6 +92,10 @@ def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
         Returns:
             List[MinScoreSample]: Precision test results.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
+
         labels = set(y_true)  # .union(set(y_pred))
 
         if isinstance(params["min_score"], dict):
@@ -149,10 +155,12 @@ class MinRecallScore(BaseAccuracy):
         transform(y_true, y_pred) -> Any: Creates accuracy test results.
     """
 
-    alias_name = "min_recall_score"
+    alias_name = ["min_recall_score"]
 
-    @staticmethod
-    def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, y_true: List[Any], params: Dict
+    ) -> List[MinScoreSample]:
         """Computes the minimum recall score for the given data.
 
         Args:
@@ -162,6 +170,10 @@ def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
         Returns:
             List[MinScoreSample]: minimum recall results.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
+
         labels = set(y_true)  # .union(set(y_pred))
 
         if isinstance(params["min_score"], dict):
@@ -221,10 +233,12 @@ class MinF1Score(BaseAccuracy):
 
     """
 
-    alias_name = "min_f1_score"
+    alias_name = ["min_f1_score"]
 
-    @staticmethod
-    def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, y_true: List[Any], params: Dict
+    ) -> List[MinScoreSample]:
         """Computes the minimum F1 score for the given data.
 
         Args:
@@ -234,6 +248,10 @@ def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
         Returns:
             List[MinScoreSample]: F1 score test results.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
+
         labels = set(y_true)
 
         if isinstance(params["min_score"], dict):
@@ -292,10 +310,12 @@ class MinMicroF1Score(BaseAccuracy):
         alias_name (str): The name for config.
     """
 
-    alias_name = "min_micro_f1_score"
+    alias_name = ["min_micro_f1_score"]
 
-    @staticmethod
-    def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, y_true: List[Any], params: Dict
+    ) -> List[MinScoreSample]:
         """Computes the minimum micro F1 score for the given data.
 
         Args:
@@ -305,6 +325,10 @@ def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
         Returns:
             List[MinScoreSample]: The transformed data based on the minimum micro F1 score.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
+
         min_score = params["min_score"]
 
         sample = MinScoreSample(

From 38f8251276a4730b00044835d20316bdab5663f4 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Tue, 8 Aug 2023 18:49:38 +0530
Subject: [PATCH 4/6] refacto accuracy.py

---
 langtest/transform/__init__.py |  4 +--
 langtest/transform/accuracy.py | 55 ++++++++++++++++++++++++----------
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/langtest/transform/__init__.py b/langtest/transform/__init__.py
index c41a576c2..309d92617 100644
--- a/langtest/transform/__init__.py
+++ b/langtest/transform/__init__.py
@@ -1068,9 +1068,9 @@ def transform(self) -> List[Sample]:
                 )
 
             y_true = y_true.dropna()
-            params["test_name"] = test_name
+
             transformed_samples = self.supported_tests[test_name].transform(
-                y_true, params
+                test_name, y_true, params
             )
 
             for sample in transformed_samples:
diff --git a/langtest/transform/accuracy.py b/langtest/transform/accuracy.py
index 133ca238a..4734f2a4b 100644
--- a/langtest/transform/accuracy.py
+++ b/langtest/transform/accuracy.py
@@ -375,10 +375,12 @@ class MinMacroF1Score(BaseAccuracy):
         transform(y_true, params) -> Any: Creates accuracy test results.
     """
 
-    alias_name = "min_macro_f1_score"
+    alias_name = ["min_macro_f1_score"]
 
-    @staticmethod
-    def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, y_true: List[Any], params: Dict
+    ) -> List[MinScoreSample]:
         """Computes the minimum macro F1 score for the given data.
 
         Args:
@@ -388,6 +390,9 @@ def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
         Returns:
             List[MinScoreSample]: The transformed data based on the minimum macro F1 score.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
         min_score = params["min_score"]
 
         sample = MinScoreSample(
@@ -433,10 +438,12 @@ class MinWeightedF1Score(BaseAccuracy):
         transform(y_true, params) -> Any: Creates accuracy test results.
     """
 
-    alias_name = "min_weighted_f1_score"
+    alias_name = ["min_weighted_f1_score"]
 
-    @staticmethod
-    def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, y_true: List[Any], params: Dict
+    ) -> List[MinScoreSample]:
         """Computes the minimum weighted F1 score for the given data.
 
         Args:
@@ -446,6 +453,9 @@ def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
         Returns:
             List[MinScoreSample]: The transformed data based on the minimum F1 score.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
         min_score = params["min_score"]
 
         sample = MinScoreSample(
@@ -490,11 +500,13 @@ class MinEMcore(BaseAccuracy):
         transform(y_true, y_pred) -> Any: Creates accuracy test results.
     """
 
-    alias_name = "min_exact_match_score"
+    alias_name = ["min_exact_match_score"]
     supported_tasks = ["question-answering", "summarization"]
 
-    @staticmethod
-    def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, y_true: List[Any], params: Dict
+    ) -> List[MinScoreSample]:
         """Computes the minimum F1 score for the given data.
 
         Args:
@@ -504,6 +516,9 @@ def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
         Returns:
             List[MinScoreSample]: The transformed data based on the minimum F1 score.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
         min_score = params["min_score"]
 
         sample = MinScoreSample(
@@ -552,11 +567,13 @@ class MinBLEUcore(BaseAccuracy):
         transform(y_true, y_pred) -> Any: Creates accuracy test results.
     """
 
-    alias_name = "min_bleu_score"
+    alias_name = ["min_bleu_score"]
     supported_tasks = ["question-answering", "summarization"]
 
-    @staticmethod
-    def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, y_true: List[Any], params: Dict
+    ) -> List[MinScoreSample]:
         """Computes the minimum F1 score for the given data.
 
         Args:
@@ -566,6 +583,9 @@ def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
         Returns:
             List[MinScoreSample]: The transformed data based on the minimum F1 score.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
         min_score = params["min_score"]
 
         sample = MinScoreSample(
@@ -622,8 +642,10 @@ class MinROUGEcore(BaseAccuracy):
     ]
     supported_tasks = ["question-answering", "summarization"]
 
-    @staticmethod
-    def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, y_true: List[Any], params: Dict
+    ) -> List[MinScoreSample]:
         """Computes the minimum F1 score for the given data.
 
         Args:
@@ -634,11 +656,14 @@ def transform(y_true: List[Any], params: Dict) -> List[MinScoreSample]:
         Returns:
             List[MinScoreSample]: The transformed data based on the minimum F1 score.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
         min_score = params["min_score"]
 
         sample = MinScoreSample(
             category="accuracy",
-            test_type=params["test_name"],
+            test_type=test,
             expected_results=MinScoreOutput(min_score=min_score),
         )
 

From 81e589fe8b464d13bbc33f280dabd5c4072670e4 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Tue, 8 Aug 2023 18:54:28 +0530
Subject: [PATCH 5/6] pytest: test_accuracy.py

---
 tests/test_accuracy.py | 129 +++++++++++++++++++++++++++--------------
 1 file changed, 87 insertions(+), 42 deletions(-)

diff --git a/tests/test_accuracy.py b/tests/test_accuracy.py
index 1b2d19c8b..ccb855ae3 100644
--- a/tests/test_accuracy.py
+++ b/tests/test_accuracy.py
@@ -1,51 +1,96 @@
-import unittest
+import pytest
 import pandas as pd
-from langtest import Harness
+from langtest.utils.custom_types import SequenceLabel, Span
+from langtest.utils.custom_types.output import (
+    NEROutput,
+    NERPrediction,
+    SequenceClassificationOutput,
+)
+from langtest.utils.custom_types.sample import (
+    NERSample,
+    QASample,
+    SequenceClassificationSample,
+    SummarizationSample,
+)
 
 
-class AccuracyTestCase(unittest.TestCase):
+class TestAccuracy:
     """
-    A test case for accuracy evaluation.
-
-    This test case performs accuracy evaluation using the `Harness` class from `langtest` package.
-    It tests the accuracy of a named entity recognition (NER) model using a sample dataset in CoNLL format.
-
-    Attributes:
-        h_spacy (Harness): The `Harness` instance for evaluating the model.
-        report (pd.DataFrame): The evaluation report generated by the `Harness`.
+    A test suite for evaluating accuracy classes.
     """
 
-    def setUp(self) -> None:
-        """
-        Set up the test case.
-
-        This method initializes the `Harness` instance with the necessary configuration for evaluating the model.
-        It configures the test settings, such as the minimum pass rate and minimum F1 score, for accuracy evaluation.
-        """
-
-        self.h_spacy = Harness(
-            task="ner",
-            model="en_core_web_sm",
-            data="langtest/data/conll/sample.conll",
-            hub="spacy",
-        )
-        self.h_spacy.configure(
-            {
-                "tests": {
-                    "defaults": {
-                        "min_pass_rate": 0.65,
-                    },
-                    "accuracy": {"min_f1_score": {"min_score": 0.65}},
-                }
-            }
-        )
-        self.report = self.h_spacy.generate().run().report()
+    accuracy_config = {
+        "min_precision_score": {"min_score": 0.66},
+        "min_recall_score": {"min_score": 0.60},
+        "min_f1_score": {"min_score": 0.60},
+        "min_micro_f1_score": {"min_score": 0.60},
+        "min_macro_f1_score": {"min_score": 0.60},
+        "min_weighted_f1_score": {"min_score": 0.60},
+        "min_bleu_score": {"min_score": 0.66},
+        "min_exact_match_score": {"min_score": 0.60},
+        "min_rouge1_score": {"min_score": 0.60},
+        "min_rouge2_score": {"min_score": 0.60},
+        "min_rougeL_score": {"min_score": 0.60},
+        "min_rougeLsum_score": {"min_score": 0.60},
+    }
 
-    def test_report(self):
-        """
-        Test the evaluation report.
+    @pytest.fixture
+    def sample_data(self):
+        """A fixture providing sample data for testing.
 
-        This method asserts that the evaluation report generated by the `Harness` is an instance of `pd.DataFrame`.
+        Returns:
+            dict: A dictionary containing sample data for different tasks.
         """
-
-        self.assertIsInstance(self.report, pd.DataFrame)
+        return {
+            "text-classification": [
+                SequenceClassificationSample(
+                    original="The last good ernest movie, and the best at that. how can you not laugh at least once at this movie. the last line is a classic, as is ernest's gangster impressions, his best moment on film. this has his best lines and is a crowning achievement among the brainless screwball comedies.",
+                    expected_results=SequenceClassificationOutput(
+                        predictions=[SequenceLabel(label="Positive", score=1.0)]
+                    ),
+                ),
+                SequenceClassificationSample(
+                    original="After my 6 year old daughter began taking riding lessons I started looking for horse movies for her. I had always heard of National Velvet but had never seen it. Boy am I glad I bought it! It's become a favorite of mine, my 6 year old AND my 2 year old. It's a shame movies like this aren't made anymore.",
+                    expected_results=SequenceClassificationOutput(
+                        predictions=[SequenceLabel(label="Positive", score=1.0)]
+                    ),
+                ),
+            ],
+            "ner": [
+                NERSample(
+                    original="Attendance : 3,000",
+                    expected_results=NEROutput(
+                        predictions=[
+                            NERPrediction(
+                                entity="CARDINAL",
+                                span=Span(start=13, end=18, word="3,000"),
+                            )
+                        ]
+                    ),
+                ),
+                NERSample(
+                    original="I do not love KFC",
+                    expected_results=NEROutput(
+                        predictions=[
+                            NERPrediction(
+                                entity="PROD", span=Span(start=14, end=17, word="KFC")
+                            )
+                        ]
+                    ),
+                ),
+            ],
+            "question-answering": [
+                QASample(
+                    original_question="What is John Snow Labs?",
+                    original_context="John Snow Labs is a healthcare company specializing in accelerating progress in data science.",
+                    expected_results="A healthcare company specializing in accelerating progress in data science. ",
+                )
+            ],
+            "summarization": [
+                SummarizationSample(
+                    original="John Snow Labs is a healthcare company specializing in accelerating progress in data "
+                    "science.",
+                    expected_results="JSL is a data science company",
+                )
+            ],
+        }

From 2ef2e7a7d6e51dcb525ede9c03ca96bf0de9c1b5 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Tue, 8 Aug 2023 19:04:28 +0530
Subject: [PATCH 6/6] test(test_accuracy.py):pytest for accuracy

---
 tests/test_accuracy.py   | 67 ++++++++++++++++++++++++++++++++++++++++
 tests/test_robustness.py |  6 ++--
 2 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/tests/test_accuracy.py b/tests/test_accuracy.py
index ccb855ae3..db1c2cc0a 100644
--- a/tests/test_accuracy.py
+++ b/tests/test_accuracy.py
@@ -1,5 +1,16 @@
 import pytest
 import pandas as pd
+from langtest.transform.accuracy import (
+    BaseAccuracy,
+    MinPrecisionScore,
+    MinF1Score,
+    MinMicroF1Score,
+    MinMacroF1Score,
+    MinWeightedF1Score,
+    MinEMcore,
+    MinBLEUcore,
+    MinROUGEcore,
+)
 from langtest.utils.custom_types import SequenceLabel, Span
 from langtest.utils.custom_types.output import (
     NEROutput,
@@ -7,6 +18,7 @@
     SequenceClassificationOutput,
 )
 from langtest.utils.custom_types.sample import (
+    MinScoreSample,
     NERSample,
     QASample,
     SequenceClassificationSample,
@@ -94,3 +106,58 @@ def sample_data(self):
                 )
             ],
         }
+
+    @pytest.mark.parametrize(
+        "accuracy",
+        [
+            MinPrecisionScore,
+            MinF1Score,
+            MinMicroF1Score,
+            MinMacroF1Score,
+            MinWeightedF1Score,
+            MinEMcore,
+            MinBLEUcore,
+            MinROUGEcore,
+        ],
+    )
+    def test_transform(self, accuracy: BaseAccuracy, sample_data) -> None:
+        """Test the transform method of accuracy-related classes.
+
+        Args:
+            accuracy (BaseAccuracy): An accuracy-related class to test.
+            sample_data (dict): Sample data for different tasks.
+
+        Returns:
+            None
+        """
+        for alias in accuracy.alias_name:
+            for task in accuracy.supported_tasks:
+                if task == "text-classification":
+                    y_true = (
+                        pd.Series(sample_data["text-classification"])
+                        .apply(
+                            lambda x: [y.label for y in x.expected_results.predictions]
+                        )
+                        .explode()
+                    )
+                elif task == "ner":
+                    y_true = pd.Series(sample_data["ner"]).apply(
+                        lambda x: [y.entity for y in x.expected_results.predictions]
+                    )
+                    y_true = y_true.explode().apply(
+                        lambda x: x.split("-")[-1] if isinstance(x, str) else x
+                    )
+
+                else:
+                    y_true = (
+                        pd.Series(sample_data[task])
+                        .apply(lambda x: x.expected_results)
+                        .explode()
+                    )
+                transform_results = accuracy.transform(
+                    alias, y_true, self.accuracy_config[alias]
+                )
+                assert isinstance(transform_results, list)
+
+                for _, result in zip(y_true, transform_results):
+                    assert isinstance(result, MinScoreSample)
diff --git a/tests/test_robustness.py b/tests/test_robustness.py
index ff4b0e481..547954323 100644
--- a/tests/test_robustness.py
+++ b/tests/test_robustness.py
@@ -494,8 +494,8 @@ def test(self) -> None:
         """
         prob = 1.0
         for test in self.perturbations_list:
-            for s in self.samples:
-                sample = self.samples[s][-1]
+            for task in self.samples:
+                sample = self.samples[task][-1]
                 test_func = self.supported_tests[test].transform
 
                 if test not in [
@@ -530,7 +530,7 @@ def test(self) -> None:
                         ],
                     )
 
-                if s == "question-answering":
+                if task == "question-answering":
                     assert (
                         sample.perturbed_question is not None
                         and sample.perturbed_context is not None