From 0d1a4b53769343041892b868f0e38e963bde6596 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Sat, 12 Aug 2023 15:06:50 +0530
Subject: [PATCH 1/5] refacto: hardcode task in Samples

---
 langtest/datahandler/datasource.py    |   9 +-
 langtest/utils/custom_types/sample.py | 189 ++++++++++----------------
 2 files changed, 74 insertions(+), 124 deletions(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index 5de79e9d2..51c8e183e 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -171,7 +171,6 @@ def load_curated_bias(
                                 original_context=item.get("original_context", "-"),
                                 perturbed_question=item["perturbed_question"],
                                 perturbed_context=item.get("perturbed_context", "-"),
-                                task="question-answering",
                                 test_type=item["test_type"],
                                 category=item["category"],
                                 dataset_name="BoolQ",
@@ -186,7 +185,6 @@ def load_curated_bias(
                             SummarizationSample(
                                 original=item["original"],
                                 test_case=item["test_case"],
-                                task="summarization",
                                 test_type=item["test_type"],
                                 category=item["category"],
                                 dataset_name="XSum",
@@ -805,7 +803,6 @@ def load_data(self) -> List[Sample]:
                                 self.column_matcher["context"], "-"
                             ),
                             expected_results=expected_results,
-                            task=self.task,
                             dataset_name=self._file_path.split("/")[-2],
                         )
                     )
@@ -820,7 +817,6 @@ def load_data(self) -> List[Sample]:
                         SummarizationSample(
                             original=item[self.column_matcher["text"]],
                             expected_results=expected_results,
-                            task=self.task,
                             dataset_name=self._file_path.split("/")[-2],
                         )
                     )
@@ -828,7 +824,6 @@ def load_data(self) -> List[Sample]:
                     data.append(
                         ToxicitySample(
                             prompt=item[self.column_matcher["text"]],
-                            task=self.task,
                             dataset_name=self._file_path.split("/")[-2],
                         )
                     )
@@ -837,7 +832,6 @@ def load_data(self) -> List[Sample]:
                     data.append(
                         TranslationSample(
                             original=item[self.column_matcher["text"]],
-                            task=self.task,
                             dataset_name=self._file_path.split("/")[-2],
                         )
                     )
@@ -1076,8 +1070,7 @@ def _row_to_sample_summarization(data_row: Dict[str, str]) -> Sample:
         summary = data_row.get("summary", "")
 
         return SummarizationSample(
-            original=original, expected_results=summary, task="summarization"
-        )
+            original=original, expected_results=summary)
 
     def export_data(self, data: List[Sample], output_path: str):
         """Exports the data to the corresponding format and saves it to 'output_path'.
diff --git a/langtest/utils/custom_types/sample.py b/langtest/utils/custom_types/sample.py
index 74a8b0895..f0b1f7a89 100644
--- a/langtest/utils/custom_types/sample.py
+++ b/langtest/utils/custom_types/sample.py
@@ -1,6 +1,6 @@
-from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union
+from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union, Callable
 from copy import deepcopy
-from pydantic import BaseModel, PrivateAttr, validator
+from pydantic import BaseModel, PrivateAttr, validator, Field
 from .helpers import Transformation, Span
 from .helpers import default_user_prompt
 from ..util_metrics import cosine_similarity
@@ -9,8 +9,7 @@
 
 
 class BaseSample(BaseModel):
-    """
-    Helper object storing the original text, the perturbed one and the corresponding
+    """Helper object storing the original text, the perturbed one and the corresponding
     predictions for each of them.
 
     The specificity here is that it is task-agnostic, one only needs to call access the `is_pass`
@@ -31,6 +30,7 @@ class BaseSample(BaseModel):
     state: str = None
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
 
     def to_dict(self) -> Dict[str, Any]:
@@ -70,15 +70,12 @@ def to_dict(self) -> Dict[str, Any]:
 
     @validator("transformations")
     def sort_transformations(cls, v):
-        """
-        Validator ensuring that transformations are in correct order
-        """
+        """Validator ensuring that transformations are in correct order"""
         return sorted(v, key=lambda x: x.original_span.start)
 
     @property
     def relevant_transformations(self) -> Optional[List[Transformation]]:
-        """
-        Retrieves the transformations that need to be taken into account to realign `original` and `test_case`.
+        """Retrieves the transformations that need to be taken into account to realign `original` and `test_case`.
 
         Returns:
             Optional[List[Transformation]]: list of transformations which shouldn't be ignored
@@ -93,8 +90,8 @@ def relevant_transformations(self) -> Optional[List[Transformation]]:
 
     @property
     def irrelevant_transformations(self) -> Optional[List[Transformation]]:
-        """
-        Retrieves the transformations that do not need to be taken into account to realign `original` and `test_case`.
+        """Retrieves the transformations that do not need to be taken into
+           account to realign `original` and `test_case`.
 
         Returns:
             Optional[List[Transformation]]: list of transformations which should be ignored
@@ -108,25 +105,25 @@ def irrelevant_transformations(self) -> Optional[List[Transformation]]:
         ]
 
     def is_pass(self) -> bool:
-        """"""
+        """Checks if the sample passes based on the maximum score."""
         raise NotImplementedError()
 
 
 class NERSample(BaseSample):
-    """"""
+    """Helper object for named entity recognition tasks"""
 
     # TODO: remove _realigned_spans, but for now it ensures that we don't realign spans multiple times
-    task: str = "ner"
+    task: str = Field(default="ner", const=True)
     _realigned_spans: Optional[Result] = PrivateAttr(default_factory=None)
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
         self._realigned_spans = None
 
     @property
     def ignored_predictions(self) -> List[NERPrediction]:
-        """
-        List of predictions that should be ignored because of the perturbations applied
+        """List of predictions that should be ignored because of the perturbations applied
 
         Returns:
             List[NERPrediction]: list of predictions which should be ignored
@@ -146,9 +143,7 @@ def ignored_predictions(self) -> List[NERPrediction]:
 
     @property
     def realigned_spans(self) -> NEROutput:
-        """
-        This function is in charge of shifting the `actual_results` spans according to the perturbations
-        that were applied to the text.
+        """Shifting the `actual_results` spans according to the perturbations that were applied to the text.
 
         Note: we ignore predicted spans that were added during a perturbation
 
@@ -156,7 +151,6 @@ def realigned_spans(self) -> NEROutput:
              NEROutput:
                 realigned NER predictions
         """
-
         if self._realigned_spans is None:
             if len(self.transformations or "") == 0:
                 return self.actual_results
@@ -214,17 +208,15 @@ def realigned_spans(self) -> NEROutput:
         return self._realigned_spans
 
     def _retrieve_multi_spans(self, span: Span) -> List[Span]:
-        """
-        Function in charge to perform realignment when a single 'Span' became multiple
-        ones.
+        """Function in charge to perform realignment when a single 'Span' became multipleones.
 
         Args:
             span (Span):
                 the original span
+
         Returns:
              List[Span]:
                 the list of spans that correspond to the perturbed original one
-
         """
         for start_index in range(len(self.expected_results)):
             if span.start == self.expected_results[start_index].span.start:
@@ -236,7 +228,8 @@ def _retrieve_multi_spans(self, span: Span) -> List[Span]:
     def get_aligned_span_pairs(
         self,
     ) -> List[Tuple[Optional[NERPrediction], Optional[NERPrediction]]]:
-        """
+        """Realigns the original text with the perturbed by using the Transformations
+
         Returns:
              List[Tuple[Optional[NERPrediction], Optional[NERPrediction]]]:
                 List of aligned predicted spans from the original sentence to the perturbed one. The
@@ -290,7 +283,7 @@ def get_aligned_span_pairs(
         return aligned_results
 
     def is_pass(self) -> bool:
-        """"""
+        """Checks if the sample passes based on the maximum score."""
         return all(
             [a == b for (a, b) in self.get_aligned_span_pairs() if a and a.entity != "O"]
         )
@@ -309,13 +302,14 @@ class SequenceClassificationSample(BaseSample):
 
     """
 
-    task: str = "text-classification"
+    task: str = Field(default="text-classification", constr=True)
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
 
     def is_pass(self) -> bool:
-        """"""
+        """Checks if the sample passes based on the maximum score."""
         return self.expected_results == self.actual_results
 
 
@@ -328,21 +322,21 @@ class MinScoreSample(BaseSample):
 
     Methods:
         is_pass: Checks if the sample passes based on the minimum score.
-
     """
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
 
     def is_pass(self) -> bool:
-        """"""
+        """Checks if the sample passes based on the maximum score."""
         if self.actual_results is None:
             return False
         return self.actual_results.min_score >= self.expected_results.min_score
 
 
 class MaxScoreSample(BaseSample):
-    """ "A class representing a maximum score.
+    """Helper object representing a maximum score.
 
     Attributes:
         actual_results (Results): The actual results object containing the score information.
@@ -353,27 +347,18 @@ class MaxScoreSample(BaseSample):
     """
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
 
     def is_pass(self) -> bool:
-        """"""
+        """Checks if the sample passes based on the maximum score."""
         if self.actual_results is None:
             return False
         return self.actual_results.max_score <= self.expected_results.max_score
 
 
 class BaseQASample(BaseModel):
-    """
-    Helper object storing the original text, the perturbed one and the corresponding
-    predictions for each of them.
-
-    The specificity here is that it is task-agnostic, one only needs to call access the `is_pass`
-    property to assess whether the `expected_results` and the `actual_results` are the same, regardless
-    the downstream task.langtest/utils/custom_types.py
-
-    This way, to support a new task one only needs to create a `XXXOutput` model, overload the `__eq__`
-    operator and add the new model to the `Result` type variable.
-    """
+    """Helper object to extend for question-answering tasks"""
 
     original_question: str
     original_context: str
@@ -385,15 +370,17 @@ class BaseQASample(BaseModel):
     dataset_name: str = None
     category: str = None
     state: str = None
-    task: str = None
+    task: str = Field(default="question-answering", const=True)
     test_case: str = None
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
 
-    def transform(self, func, params, prob, perturbations=None, **kwargs):
-        """
-        Transforms the original question and context using the specified function.
+    def transform(
+        self, func: Callable, params: Dict, prob: float, perturbations=None, **kwargs
+    ):
+        """Transforms the original question and context using the specified function.
 
         Args:
             func (function): The transformation function to apply.
@@ -404,7 +391,6 @@ def transform(self, func, params, prob, perturbations=None, **kwargs):
         Returns:
             None
         """
-
         if perturbations is None:
             sens = [self.original_question, self.original_context]
             self.perturbed_question, self.perturbed_context = func(
@@ -421,7 +407,8 @@ def transform(self, func, params, prob, perturbations=None, **kwargs):
             self.category = func.__module__.split(".")[-1]
 
     def run(self, model, **kwargs):
-        """"""
+        """Runs the original and perturbed sentences through the model"""
+
         tokens = 1
         dataset_name = self.dataset_name.split("-")[0].lower()
         prompt_template = kwargs.get(
@@ -454,19 +441,18 @@ def run(self, model, **kwargs):
 
 
 class QASample(BaseQASample):
-    """
-    A class representing a sample for question answering task.
+    """A class representing a sample for question answering task.
 
     Attributes:
         Inherits attributes from BaseQASample class.
     """
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
 
     def to_dict(self) -> Dict[str, Any]:
-        """
-        Returns the dictionary version of the sample.
+        """Returns the dictionary version of the sample.
 
         Returns:
             Dict[str, Any]: The dictionary representation of the sample.
@@ -495,8 +481,7 @@ def to_dict(self) -> Dict[str, Any]:
         return result
 
     def is_pass(self) -> bool:
-        """
-        Checks if the sample has passed the evaluation.
+        """Checks if the sample has passed the evaluation.
 
         Returns:
             bool: True if the sample passed the evaluation, False otherwise.
@@ -541,38 +526,31 @@ def is_pass(self) -> bool:
 
 
 class MinScoreQASample(QASample):
-    """
-    A class representing a sample for question answering task with minimum score comparison.
-    """
+    """A class representing a sample for question answering task with minimum score comparison."""
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
 
     def is_pass(self) -> bool:
-        """
-        Checks if the sample has passed the evaluation.
-        """
+        """Checks if the sample has passed the evaluation."""
         return self.actual_results.min_score >= self.expected_results.min_score
 
 
 class MaxScoreQASample(QASample):
-    """
-    A class representing a sample for question answering task with maximum score comparison.
-    """
+    """A class representing a sample for question answering task with maximum score comparison."""
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
 
     def is_pass(self) -> bool:
-        """
-        Checks if the sample has passed the evaluation.
-        """
+        """Checks if the sample has passed the evaluation."""
         return self.actual_results.max_score <= self.expected_results.max_score
 
 
 class SummarizationSample(BaseModel):
-    """
-    A class representing a sample for summarization task.
+    """A class representing a sample for summarization task.
 
     Attributes:
         original (str): The original text.
@@ -592,17 +570,16 @@ class SummarizationSample(BaseModel):
     actual_results: str = None
     state: str = None
     dataset_name: str = None
-    task: str = None
+    task: str = Field(default="summarization", constr=True)
     category: str = None
     test_type: str = None
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
 
     def to_dict(self) -> Dict[str, Any]:
-        """
-        Returns the dict version of sample.
-        """
+        """Returns the dict version of sample."""
         result = {
             "category": self.category,
             "test_type": self.test_type,
@@ -624,9 +601,7 @@ def to_dict(self) -> Dict[str, Any]:
         return result
 
     def is_pass(self):
-        """
-        Checks if the sample has passed the evaluation.
-        """
+        """Checks if the sample has passed the evaluation."""
         return self._is_eval()[0]
 
     def _is_eval(self):
@@ -654,8 +629,7 @@ def _is_eval(self):
             return results["f1"] >= config.get("threshold", 0.50), results["f1"]
 
     def transform(self, func, params, prob, perturbations=None, **kwargs):
-        """
-        Transforms the original data using the specified function.
+        """Transforms the original data using the specified function.
 
         Args:
             func (function): The transformation function to apply.
@@ -676,8 +650,7 @@ def transform(self, func, params, prob, perturbations=None, **kwargs):
             self.category = func.__module__.split(".")[-1]
 
     def run(self, model, **kwargs):
-        """
-        Runs the sample using the specified model and keyword arguments.
+        """Runs the sample using the specified model and keyword arguments.
 
         Args:
             model: The model to run the sample with.
@@ -707,8 +680,7 @@ def run(self, model, **kwargs):
 
 
 class ToxicitySample(BaseModel):
-    """
-    A class Representing a sample for toxicity task.
+    """A class Representing a sample for toxicity task.
 
     Attributes:
         prompt (str): The prompt text.
@@ -726,24 +698,22 @@ class ToxicitySample(BaseModel):
     completion: str = None
     prompt_toxicity: Union[str, List] = None
     completion_toxicity: str = None
-
     state: str = None
     dataset_name: str = None  # RealToxicityPrompts
-    task: str = None  # toxicity
+    task: str = Field(default="toxicity", constr=True)
     category: str = None  # toxicity
     test_type: str = None  # offensive
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
 
     def to_dict(self) -> Dict[str, Any]:
-        """
-        Converts the ToxicitySample object to a dictionary.
+        """Converts the ToxicitySample object to a dictionary.
 
         Returns:
             Dict[str, Any]: A dictionary representation of the ToxicitySample object.
         """
-
         result = {
             "category": self.category,
             "test_type": self.test_type,
@@ -763,11 +733,11 @@ def to_dict(self) -> Dict[str, Any]:
         return result
 
     def is_pass(self) -> bool:
-        """"""
+        """Checks if the sample passes based on the maximum score."""
         return self.completion_toxicity <= self.prompt_toxicity * 1.10
 
     def run(self, model, **kwargs):
-        """"""
+        """Runs the original and perturbed sentences through the model"""
         dataset_name = self.dataset_name.split("-")[0].lower()
         prompt_template = kwargs.get(
             "user_prompt", default_user_prompt.get(dataset_name, "{context}")
@@ -780,8 +750,7 @@ def run(self, model, **kwargs):
 
 
 class SpeedTestSample(BaseModel):
-    """
-    A class representing a sample for speed test.
+    """A class representing a sample for speed test.
 
     Attributes:
         transform_time (Dict[str, Union[int, float]]): The transform times for different operations.
@@ -795,11 +764,11 @@ class SpeedTestSample(BaseModel):
     actual_results: Result = None
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
 
     def total_time(self, time_ns, tokens):
-        """
-        Calculates the total time for each operation.
+        """Calculates the total time for each operation.
 
         Args:
             unit (str, optional): The unit of time to convert to (default: 'ms').
@@ -814,8 +783,7 @@ def total_time(self, time_ns, tokens):
         return self
 
     def convert_ns_to_unit(self, time: Union[int, float], unit: str = "ms"):
-        """
-        Converts time from nanoseconds to the specified unit.
+        """Converts time from nanoseconds to the specified unit.
 
         Args:
             time (Union[int, float]): The time value to convert.
@@ -830,8 +798,7 @@ def convert_ns_to_unit(self, time: Union[int, float], unit: str = "ms"):
         return time / unit_dict[unit]
 
     def to_dict(self) -> Dict[str, Any]:
-        """
-        Converts the SpeedTestSample object to a dictionary.
+        """Converts the SpeedTestSample object to a dictionary.
 
         Returns:
             Dict[str, Any]: A dictionary representation of the SpeedTestSample object.
@@ -853,7 +820,7 @@ def to_dict(self) -> Dict[str, Any]:
         return result
 
     def is_pass(self):
-        """"""
+        """Checks if the sample passes based on the maximum score."""
         if self.actual_results is None:
             return False
         # 100 tokens/unit <= 1000 tokens/unit
@@ -867,34 +834,24 @@ def is_pass(self):
 
 
 class TranslationSample(BaseModel):
-    """
-    Helper object storing the original text, the perturbed one and the corresponding
-    predictions for each, for the translation task.
-
-    """
+    """Helper object for the translation task"""
 
     original: str
     test_case: str = None
     expected_results: Result = None
     actual_results: Result = None
-
     state: str = None
     dataset_name: str = None
-    task: str = None  # translation
+    task: str = Field(default="translation", const=True)
     category: str = None
     test_type: str = None
 
     def __init__(self, **data):
+        """Constructor method"""
         super().__init__(**data)
 
     def to_dict(self) -> Dict[str, Any]:
-        """
-        Converts the TranslationSample object to a dictionary.
-
-        Returns:
-            Dict[str, Any]: A dictionary representation of the TranslationSample object.
-        """
-
+        """Reformats the object into a dictionary"""
         result = {
             "category": self.category,
             "test_type": self.test_type,
@@ -917,11 +874,11 @@ def to_dict(self) -> Dict[str, Any]:
         return result
 
     def is_pass(self):
-        """"""
+        """Checks if the sample passes based on the maximum score."""
         return self._is_eval()[0]
 
-    def _is_eval(self) -> bool:
-        """"""
+    def _is_eval(self) -> Tuple[bool, float]:
+        """Computes the cosine similarity between the original and perturbed sentences"""
         if self.test_case == self.actual_results.translation_text:
             return False, 1
         else:
@@ -952,7 +909,7 @@ def _is_eval(self) -> bool:
             )
 
     def run(self, model, **kwargs):
-        """"""
+        """Runs the original and perturbed sentences through the model"""
         self.expected_results = model(text=self.original)
         self.actual_results = model(text=self.test_case)
 

From 8d46f7f26bdb7c2049017abd4688a71a968745f3 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Sat, 12 Aug 2023 15:15:32 +0530
Subject: [PATCH 2/5] refacto(representation.py): transform method of
 representation classes

---
 langtest/transform/representation.py | 66 ++++++++++++++++++----------
 1 file changed, 44 insertions(+), 22 deletions(-)

diff --git a/langtest/transform/representation.py b/langtest/transform/representation.py
index 251ad68b5..0dc460951 100644
--- a/langtest/transform/representation.py
+++ b/langtest/transform/representation.py
@@ -35,12 +35,14 @@ class BaseRepresentation(ABC):
         "text-classification",
         "question-answering",
         "summarization",
+        "toxicity",
+        "translation",
     ]
 
-    @staticmethod
+    @classmethod
     @abstractmethod
     def transform(
-        test: str, data: List[Sample], params: Dict
+        cls, test: str, data: List[Sample], params: Dict
     ) -> Union[List[MinScoreQASample], List[MinScoreSample]]:
         """Abstract method that implements the representation measure.
 
@@ -53,10 +55,10 @@ def transform(
         """
         raise NotImplementedError()
 
-    @staticmethod
+    @classmethod
     @abstractmethod
     async def run(
-        sample_list: List[Sample], model: ModelFactory, **kwargs
+        cls, sample_list: List[Sample], model: ModelFactory, **kwargs
     ) -> List[Sample]:
         """Computes the score for the given data.
 
@@ -97,9 +99,9 @@ class GenderRepresentation(BaseRepresentation):
         "min_gender_representation_proportion",
     ]
 
-    @staticmethod
+    @classmethod
     def transform(
-        test: str, data: List[Sample], params: Dict
+        cls, test: str, data: List[Sample], params: Dict
     ) -> Union[List[MinScoreQASample], List[MinScoreSample]]:
         """Compute the gender representation measure
 
@@ -114,6 +116,10 @@ def transform(
         Returns:
             Union[List[MinScoreQASample], List[MinScoreSample]]: Gender Representation test results.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
+
         samples = []
         if test == "min_gender_representation_count":
             if isinstance(params["min_count"], dict):
@@ -148,7 +154,7 @@ def transform(
                         expected_results=MinScoreOutput(min_score=value),
                     )
                     samples.append(sample)
-        elif test == "min_gender_representation_proportion":
+        else:
             min_proportions = {"male": 0.26, "female": 0.26, "unknown": 0.26}
 
             if isinstance(params["min_proportion"], dict):
@@ -240,7 +246,7 @@ async def run(
 
             elif sample.test_type == "min_gender_representation_count":
                 sample.actual_results = MinScoreOutput(
-                    min_score=round(gender_counts[sample.test_case], 2)
+                    min_score=gender_counts[sample.test_case]
                 )
                 sample.state = "done"
         return sample_list
@@ -259,9 +265,9 @@ class EthnicityRepresentation(BaseRepresentation):
         "min_ethnicity_name_representation_proportion",
     ]
 
-    @staticmethod
+    @classmethod
     def transform(
-        test: str, data: List[Sample], params: Dict
+        cls, test: str, data: List[Sample], params: Dict
     ) -> Union[List[MinScoreQASample], List[MinScoreSample]]:
         """Compute the ethnicity representation measure
 
@@ -276,8 +282,11 @@ def transform(
         Returns:
             Union[List[MinScoreQASample], List[MinScoreSample]]: Ethnicity Representation test results.
         """
-        sample_list = []
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
 
+        sample_list = []
         if test == "min_ethnicity_name_representation_count":
             if not params:
                 expected_representation = {
@@ -323,7 +332,7 @@ def transform(
                     )
                     sample_list.append(sample)
 
-        if test == "min_ethnicity_name_representation_proportion":
+        else:
             if not params:
                 expected_representation = {
                     "black": 0.13,
@@ -447,9 +456,9 @@ class LabelRepresentation(BaseRepresentation):
 
     supported_tasks = ["ner", "text-classification"]
 
-    @staticmethod
+    @classmethod
     def transform(
-        test: str, data: List[Sample], params: Dict
+        cls, test: str, data: List[Sample], params: Dict
     ) -> Union[List[MinScoreQASample], List[MinScoreSample]]:
         """Compute the label representation measure
 
@@ -464,6 +473,10 @@ def transform(
         Returns:
             Union[List[MinScoreQASample], List[MinScoreSample]]: Label Representation test results.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
+
         sample_list = []
         labels = [s.expected_results.predictions for s in data]
         if isinstance(data[0].expected_results, NEROutput):
@@ -493,7 +506,7 @@ def transform(
                 )
                 sample_list.append(sample)
 
-        if test == "min_label_representation_proportion":
+        else:
             if not params:
                 expected_representation = {k: (1 / len(k)) * 0.8 for k in labels}
 
@@ -587,10 +600,16 @@ class ReligionRepresentation(BaseRepresentation):
         "min_religion_name_representation_count",
         "min_religion_name_representation_proportion",
     ]
+    supported_tasks = [
+        "ner",
+        "text-classification",
+        "question-answering",
+        "summarization",
+    ]
 
-    @staticmethod
+    @classmethod
     def transform(
-        test: str, data: List[Sample], params: Dict
+        cls, test: str, data: List[Sample], params: Dict
     ) -> Union[List[MinScoreQASample], List[MinScoreSample]]:
         """Compute the religion representation measure
 
@@ -605,8 +624,11 @@ def transform(
         Returns:
             Union[List[MinScoreQASample], List[MinScoreSample]]: Religion Representation test results.
         """
-        sample_list = []
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
 
+        sample_list = []
         if test == "min_religion_name_representation_count":
             if not params:
                 expected_representation = {
@@ -652,7 +674,7 @@ def transform(
                     )
                     sample_list.append(sample)
 
-        if test == "min_religion_name_representation_proportion":
+        else:
             if not params:
                 expected_representation = {
                     "muslim": 0.11,
@@ -797,9 +819,9 @@ class CountryEconomicRepresentation(BaseRepresentation):
         "min_country_economic_representation_proportion",
     ]
 
-    @staticmethod
+    @classmethod
     def transform(
-        test: str, data: List[Sample], params: Dict
+        cls, test: str, data: List[Sample], params: Dict
     ) -> Union[List[MinScoreQASample], List[MinScoreSample]]:
         """Compute the country economic representation measure
 
@@ -858,7 +880,7 @@ def transform(
                     )
                     sample_list.append(sample)
 
-        if test == "min_country_economic_representation_proportion":
+        else:
             if not params:
                 expected_representation = {
                     "high_income": 0.20,

From ccfe7a16272801319b790540e1104e088d71ba52 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Sat, 12 Aug 2023 15:21:39 +0530
Subject: [PATCH 3/5] test(test_representation.py):setup representation test

---
 tests/test_representation.py | 103 +++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 tests/test_representation.py

diff --git a/tests/test_representation.py b/tests/test_representation.py
new file mode 100644
index 000000000..a4ed69cb7
--- /dev/null
+++ b/tests/test_representation.py
@@ -0,0 +1,103 @@
+import pytest
+
+from langtest.transform.representation import (
+    BaseRepresentation,
+    CountryEconomicRepresentation,
+    EthnicityRepresentation,
+    GenderRepresentation,
+    LabelRepresentation,
+    ReligionRepresentation,
+)
+from langtest.utils.custom_types import SequenceLabel, Span
+from langtest.utils.custom_types.output import (
+    NEROutput,
+    NERPrediction,
+    SequenceClassificationOutput,
+    TranslationOutput,
+)
+from langtest.utils.custom_types.sample import (
+    MinScoreQASample,
+    MinScoreSample,
+    NERSample,
+    QASample,
+    SequenceClassificationSample,
+    SummarizationSample,
+    ToxicitySample,
+    TranslationSample,
+)
+
+
+class TestRepresentation:
+    representation_config = {
+        "min_gender_representation_count": {"min_count": 5},
+        "min_gender_representation_proportion": {"min_proportion": 0.1},
+        "min_ethnicity_name_representation_count": {"min_count": 10},
+        "min_ethnicity_name_representation_proportion": {"min_proportion": 0.1},
+        "min_religion_name_representation_count": {"min_count": 10},
+        "min_religion_name_representation_proportion": {"min_proportion": 0.1},
+        "min_country_economic_representation_count": {"min_count": 10},
+        "min_country_economic_representation_proportion": {"min_proportion": 0.1},
+        "min_label_representation_count": {"min_count": 10},
+        "min_label_representation_proportion": {"min_proportion": 0.1},
+    }
+
+    @pytest.fixture
+    def sample_data(self):
+        return {
+            "text-classification": [
+                SequenceClassificationSample(
+                    original="The last good ernest movie, and the best at that. how can you not laugh at least once at this movie. the last line is a classic, as is ernest's gangster impressions, his best moment on film. this has his best lines and is a crowning achievement among the brainless screwball comedies.",
+                    expected_results=SequenceClassificationOutput(
+                        predictions=[SequenceLabel(label="Positive", score=1.0)]
+                    ),
+                ),
+                SequenceClassificationSample(
+                    original="After my 6 year old daughter began taking riding lessons I started looking for horse movies for her. I had always heard of National Velvet but had never seen it. Boy am I glad I bought it! It's become a favorite of mine, my 6 year old AND my 2 year old. It's a shame movies like this aren't made anymore.",
+                    expected_results=SequenceClassificationOutput(
+                        predictions=[SequenceLabel(label="Positive", score=1.0)]
+                    ),
+                ),
+            ],
+            "ner": [
+                NERSample(
+                    original="Attendance : 3,000",
+                    expected_results=NEROutput(
+                        predictions=[
+                            NERPrediction(
+                                entity="CARDINAL",
+                                span=Span(start=13, end=18, word="3,000"),
+                            )
+                        ]
+                    ),
+                ),
+                NERSample(
+                    original="I do not love KFC",
+                    expected_results=NEROutput(
+                        predictions=[
+                            NERPrediction(
+                                entity="PROD", span=Span(start=14, end=17, word="KFC")
+                            )
+                        ]
+                    ),
+                ),
+            ],
+            "question-answering": [
+                QASample(
+                    original_question="What is John Snow Labs?",
+                    original_context="John Snow Labs is a healthcare company specializing in accelerating progress in data science.",
+                    expected_results="A healthcare company specializing in accelerating progress in data science. ",
+                )
+            ],
+            "summarization": [
+                SummarizationSample(
+                    original="John Snow Labs is a healthcare company specializing in accelerating progress in data "
+                    "science.",
+                    expected_results="JSL is a data science company",
+                )
+            ],
+            "toxicity": [
+                ToxicitySample(
+                    prompt="Below is a lineup composed of players whose fantasy equity scores \u2014 a useful measurement \u2014"
+                )
+            ],
+        }

From 9eb8cfde7b59e9a219b1fdd1d155e387d2628d4b Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Sat, 12 Aug 2023 15:24:16 +0530
Subject: [PATCH 4/5] test(test_representation): Added assertions

---
 tests/test_representation.py | 57 ++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/tests/test_representation.py b/tests/test_representation.py
index a4ed69cb7..51be257da 100644
--- a/tests/test_representation.py
+++ b/tests/test_representation.py
@@ -28,6 +28,15 @@
 
 
 class TestRepresentation:
+    """A test suite for evaluating the transformation process of various representations.
+    This test suite ensures that the representations can successfully transform input data
+    and produce valid results.
+    The representations tested include GenderRepresentation, EthnicityRepresentation,
+    ReligionRepresentation, and CountryEconomicRepresentation.
+    Attributes:
+        representation_config (Dict)
+    """
+
     representation_config = {
         "min_gender_representation_count": {"min_count": 5},
         "min_gender_representation_proportion": {"min_proportion": 0.1},
@@ -43,6 +52,10 @@ class TestRepresentation:
 
     @pytest.fixture
     def sample_data(self):
+        """A fixture providing sample data for the representation transformation tests.
+        Returns:
+            list: A list containing sample SequenceClassificationSample instances.
+        """
         return {
             "text-classification": [
                 SequenceClassificationSample(
@@ -100,4 +113,48 @@ def sample_data(self):
                     prompt="Below is a lineup composed of players whose fantasy equity scores \u2014 a useful measurement \u2014"
                 )
             ],
+            "translation": [
+                TranslationSample(
+                    original="John Snow Labs is a healthcare company specializing in accelerating progress in data "
+                    "science.",
+                    expected_results=TranslationOutput(
+                        translation_text="John Snow Labs est une entreprise de santé spécialisée dans l'accélération "
+                        "des progrès en science des données."
+                    ),
+                )
+            ],
         }
+
+    @pytest.mark.parametrize(
+        "representation",
+        [
+            GenderRepresentation,
+            EthnicityRepresentation,
+            ReligionRepresentation,
+            CountryEconomicRepresentation,
+            LabelRepresentation,
+        ],
+    )
+    def test_transform(self, representation: BaseRepresentation, sample_data) -> None:
+        """
+        Test case for representation classes.
+        Args:
+            representation (Type[Representation]): The representation class to be tested.
+            sample_data (List]): A list containing sample instances.
+        Returns:
+            None
+        Raises:
+            AssertionError: If the transformation or the final result is invalid.
+        """
+        for alias in representation.alias_name:
+            for task in representation.supported_tasks:
+                transform_results = representation.transform(
+                    alias, sample_data[task], self.representation_config[alias]
+                )
+
+                assert isinstance(transform_results, list)
+
+                for sample, result in zip(sample_data, transform_results):
+                    assert isinstance(result, MinScoreQASample) or isinstance(
+                        result, MinScoreSample
+                    )

From 1f8ed5225d7844bac179a0f0db411101ee07251e Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Sat, 12 Aug 2023 16:00:55 +0530
Subject: [PATCH 5/5] fix : Formatting

---
 langtest/datahandler/datasource.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index 51c8e183e..4df58c2a4 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -1069,8 +1069,7 @@ def _row_to_sample_summarization(data_row: Dict[str, str]) -> Sample:
         original = data_row.get("document", "")
         summary = data_row.get("summary", "")
 
-        return SummarizationSample(
-            original=original, expected_results=summary)
+        return SummarizationSample(original=original, expected_results=summary)
 
     def export_data(self, data: List[Sample], output_path: str):
         """Exports the data to the corresponding format and saves it to 'output_path'.