From 0d1a4b53769343041892b868f0e38e963bde6596 Mon Sep 17 00:00:00 2001 From: Rakshit Khajuria Date: Sat, 12 Aug 2023 15:06:50 +0530 Subject: [PATCH 1/5] refacto: hardcode task in Samples --- langtest/datahandler/datasource.py | 9 +- langtest/utils/custom_types/sample.py | 189 ++++++++++---------------- 2 files changed, 74 insertions(+), 124 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 5de79e9d2..51c8e183e 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -171,7 +171,6 @@ def load_curated_bias( original_context=item.get("original_context", "-"), perturbed_question=item["perturbed_question"], perturbed_context=item.get("perturbed_context", "-"), - task="question-answering", test_type=item["test_type"], category=item["category"], dataset_name="BoolQ", @@ -186,7 +185,6 @@ def load_curated_bias( SummarizationSample( original=item["original"], test_case=item["test_case"], - task="summarization", test_type=item["test_type"], category=item["category"], dataset_name="XSum", @@ -805,7 +803,6 @@ def load_data(self) -> List[Sample]: self.column_matcher["context"], "-" ), expected_results=expected_results, - task=self.task, dataset_name=self._file_path.split("/")[-2], ) ) @@ -820,7 +817,6 @@ def load_data(self) -> List[Sample]: SummarizationSample( original=item[self.column_matcher["text"]], expected_results=expected_results, - task=self.task, dataset_name=self._file_path.split("/")[-2], ) ) @@ -828,7 +824,6 @@ def load_data(self) -> List[Sample]: data.append( ToxicitySample( prompt=item[self.column_matcher["text"]], - task=self.task, dataset_name=self._file_path.split("/")[-2], ) ) @@ -837,7 +832,6 @@ def load_data(self) -> List[Sample]: data.append( TranslationSample( original=item[self.column_matcher["text"]], - task=self.task, dataset_name=self._file_path.split("/")[-2], ) ) @@ -1076,8 +1070,7 @@ def _row_to_sample_summarization(data_row: Dict[str, str]) -> Sample: summary = data_row.get("summary", "") return SummarizationSample( - original=original, expected_results=summary, task="summarization" - ) + original=original, expected_results=summary) def export_data(self, data: List[Sample], output_path: str): """Exports the data to the corresponding format and saves it to 'output_path'. diff --git a/langtest/utils/custom_types/sample.py b/langtest/utils/custom_types/sample.py index 74a8b0895..f0b1f7a89 100644 --- a/langtest/utils/custom_types/sample.py +++ b/langtest/utils/custom_types/sample.py @@ -1,6 +1,6 @@ -from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union +from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union, Callable from copy import deepcopy -from pydantic import BaseModel, PrivateAttr, validator +from pydantic import BaseModel, PrivateAttr, validator, Field from .helpers import Transformation, Span from .helpers import default_user_prompt from ..util_metrics import cosine_similarity @@ -9,8 +9,7 @@ class BaseSample(BaseModel): - """ - Helper object storing the original text, the perturbed one and the corresponding + """Helper object storing the original text, the perturbed one and the corresponding predictions for each of them. The specificity here is that it is task-agnostic, one only needs to call access the `is_pass` @@ -31,6 +30,7 @@ class BaseSample(BaseModel): state: str = None def __init__(self, **data): + """Constructor method""" super().__init__(**data) def to_dict(self) -> Dict[str, Any]: @@ -70,15 +70,12 @@ def to_dict(self) -> Dict[str, Any]: @validator("transformations") def sort_transformations(cls, v): - """ - Validator ensuring that transformations are in correct order - """ + """Validator ensuring that transformations are in correct order""" return sorted(v, key=lambda x: x.original_span.start) @property def relevant_transformations(self) -> Optional[List[Transformation]]: - """ - Retrieves the transformations that need to be taken into account to realign `original` and `test_case`. + """Retrieves the transformations that need to be taken into account to realign `original` and `test_case`. Returns: Optional[List[Transformation]]: list of transformations which shouldn't be ignored @@ -93,8 +90,8 @@ def relevant_transformations(self) -> Optional[List[Transformation]]: @property def irrelevant_transformations(self) -> Optional[List[Transformation]]: - """ - Retrieves the transformations that do not need to be taken into account to realign `original` and `test_case`. + """Retrieves the transformations that do not need to be taken into + account to realign `original` and `test_case`. Returns: Optional[List[Transformation]]: list of transformations which should be ignored @@ -108,25 +105,25 @@ def irrelevant_transformations(self) -> Optional[List[Transformation]]: ] def is_pass(self) -> bool: - """""" + """Checks if the sample passes based on the maximum score.""" raise NotImplementedError() class NERSample(BaseSample): - """""" + """Helper object for named entity recognition tasks""" # TODO: remove _realigned_spans, but for now it ensures that we don't realign spans multiple times - task: str = "ner" + task: str = Field(default="ner", const=True) _realigned_spans: Optional[Result] = PrivateAttr(default_factory=None) def __init__(self, **data): + """Constructor method""" super().__init__(**data) self._realigned_spans = None @property def ignored_predictions(self) -> List[NERPrediction]: - """ - List of predictions that should be ignored because of the perturbations applied + """List of predictions that should be ignored because of the perturbations applied Returns: List[NERPrediction]: list of predictions which should be ignored @@ -146,9 +143,7 @@ def ignored_predictions(self) -> List[NERPrediction]: @property def realigned_spans(self) -> NEROutput: - """ - This function is in charge of shifting the `actual_results` spans according to the perturbations - that were applied to the text. + """Shifting the `actual_results` spans according to the perturbations that were applied to the text. Note: we ignore predicted spans that were added during a perturbation @@ -156,7 +151,6 @@ def realigned_spans(self) -> NEROutput: NEROutput: realigned NER predictions """ - if self._realigned_spans is None: if len(self.transformations or "") == 0: return self.actual_results @@ -214,17 +208,15 @@ def realigned_spans(self) -> NEROutput: return self._realigned_spans def _retrieve_multi_spans(self, span: Span) -> List[Span]: - """ - Function in charge to perform realignment when a single 'Span' became multiple - ones. + """Function in charge to perform realignment when a single 'Span' became multipleones. Args: span (Span): the original span + Returns: List[Span]: the list of spans that correspond to the perturbed original one - """ for start_index in range(len(self.expected_results)): if span.start == self.expected_results[start_index].span.start: @@ -236,7 +228,8 @@ def _retrieve_multi_spans(self, span: Span) -> List[Span]: def get_aligned_span_pairs( self, ) -> List[Tuple[Optional[NERPrediction], Optional[NERPrediction]]]: - """ + """Realigns the original text with the perturbed by using the Transformations + Returns: List[Tuple[Optional[NERPrediction], Optional[NERPrediction]]]: List of aligned predicted spans from the original sentence to the perturbed one. The @@ -290,7 +283,7 @@ def get_aligned_span_pairs( return aligned_results def is_pass(self) -> bool: - """""" + """Checks if the sample passes based on the maximum score.""" return all( [a == b for (a, b) in self.get_aligned_span_pairs() if a and a.entity != "O"] ) @@ -309,13 +302,14 @@ class SequenceClassificationSample(BaseSample): """ - task: str = "text-classification" + task: str = Field(default="text-classification", constr=True) def __init__(self, **data): + """Constructor method""" super().__init__(**data) def is_pass(self) -> bool: - """""" + """Checks if the sample passes based on the maximum score.""" return self.expected_results == self.actual_results @@ -328,21 +322,21 @@ class MinScoreSample(BaseSample): Methods: is_pass: Checks if the sample passes based on the minimum score. - """ def __init__(self, **data): + """Constructor method""" super().__init__(**data) def is_pass(self) -> bool: - """""" + """Checks if the sample passes based on the maximum score.""" if self.actual_results is None: return False return self.actual_results.min_score >= self.expected_results.min_score class MaxScoreSample(BaseSample): - """ "A class representing a maximum score. + """Helper object representing a maximum score. Attributes: actual_results (Results): The actual results object containing the score information. @@ -353,27 +347,18 @@ class MaxScoreSample(BaseSample): """ def __init__(self, **data): + """Constructor method""" super().__init__(**data) def is_pass(self) -> bool: - """""" + """Checks if the sample passes based on the maximum score.""" if self.actual_results is None: return False return self.actual_results.max_score <= self.expected_results.max_score class BaseQASample(BaseModel): - """ - Helper object storing the original text, the perturbed one and the corresponding - predictions for each of them. - - The specificity here is that it is task-agnostic, one only needs to call access the `is_pass` - property to assess whether the `expected_results` and the `actual_results` are the same, regardless - the downstream task.langtest/utils/custom_types.py - - This way, to support a new task one only needs to create a `XXXOutput` model, overload the `__eq__` - operator and add the new model to the `Result` type variable. - """ + """Helper object to extend for question-answering tasks""" original_question: str original_context: str @@ -385,15 +370,17 @@ class BaseQASample(BaseModel): dataset_name: str = None category: str = None state: str = None - task: str = None + task: str = Field(default="question-answering", const=True) test_case: str = None def __init__(self, **data): + """Constructor method""" super().__init__(**data) - def transform(self, func, params, prob, perturbations=None, **kwargs): - """ - Transforms the original question and context using the specified function. + def transform( + self, func: Callable, params: Dict, prob: float, perturbations=None, **kwargs + ): + """Transforms the original question and context using the specified function. Args: func (function): The transformation function to apply. @@ -404,7 +391,6 @@ def transform(self, func, params, prob, perturbations=None, **kwargs): Returns: None """ - if perturbations is None: sens = [self.original_question, self.original_context] self.perturbed_question, self.perturbed_context = func( @@ -421,7 +407,8 @@ def transform(self, func, params, prob, perturbations=None, **kwargs): self.category = func.__module__.split(".")[-1] def run(self, model, **kwargs): - """""" + """Runs the original and perturbed sentences through the model""" + tokens = 1 dataset_name = self.dataset_name.split("-")[0].lower() prompt_template = kwargs.get( @@ -454,19 +441,18 @@ def run(self, model, **kwargs): class QASample(BaseQASample): - """ - A class representing a sample for question answering task. + """A class representing a sample for question answering task. Attributes: Inherits attributes from BaseQASample class. """ def __init__(self, **data): + """Constructor method""" super().__init__(**data) def to_dict(self) -> Dict[str, Any]: - """ - Returns the dictionary version of the sample. + """Returns the dictionary version of the sample. Returns: Dict[str, Any]: The dictionary representation of the sample. @@ -495,8 +481,7 @@ def to_dict(self) -> Dict[str, Any]: return result def is_pass(self) -> bool: - """ - Checks if the sample has passed the evaluation. + """Checks if the sample has passed the evaluation. Returns: bool: True if the sample passed the evaluation, False otherwise. @@ -541,38 +526,31 @@ def is_pass(self) -> bool: class MinScoreQASample(QASample): - """ - A class representing a sample for question answering task with minimum score comparison. - """ + """A class representing a sample for question answering task with minimum score comparison.""" def __init__(self, **data): + """Constructor method""" super().__init__(**data) def is_pass(self) -> bool: - """ - Checks if the sample has passed the evaluation. - """ + """Checks if the sample has passed the evaluation.""" return self.actual_results.min_score >= self.expected_results.min_score class MaxScoreQASample(QASample): - """ - A class representing a sample for question answering task with maximum score comparison. - """ + """A class representing a sample for question answering task with maximum score comparison.""" def __init__(self, **data): + """Constructor method""" super().__init__(**data) def is_pass(self) -> bool: - """ - Checks if the sample has passed the evaluation. - """ + """Checks if the sample has passed the evaluation.""" return self.actual_results.max_score <= self.expected_results.max_score class SummarizationSample(BaseModel): - """ - A class representing a sample for summarization task. + """A class representing a sample for summarization task. Attributes: original (str): The original text. @@ -592,17 +570,16 @@ class SummarizationSample(BaseModel): actual_results: str = None state: str = None dataset_name: str = None - task: str = None + task: str = Field(default="summarization", constr=True) category: str = None test_type: str = None def __init__(self, **data): + """Constructor method""" super().__init__(**data) def to_dict(self) -> Dict[str, Any]: - """ - Returns the dict version of sample. - """ + """Returns the dict version of sample.""" result = { "category": self.category, "test_type": self.test_type, @@ -624,9 +601,7 @@ def to_dict(self) -> Dict[str, Any]: return result def is_pass(self): - """ - Checks if the sample has passed the evaluation. - """ + """Checks if the sample has passed the evaluation.""" return self._is_eval()[0] def _is_eval(self): @@ -654,8 +629,7 @@ def _is_eval(self): return results["f1"] >= config.get("threshold", 0.50), results["f1"] def transform(self, func, params, prob, perturbations=None, **kwargs): - """ - Transforms the original data using the specified function. + """Transforms the original data using the specified function. Args: func (function): The transformation function to apply. @@ -676,8 +650,7 @@ def transform(self, func, params, prob, perturbations=None, **kwargs): self.category = func.__module__.split(".")[-1] def run(self, model, **kwargs): - """ - Runs the sample using the specified model and keyword arguments. + """Runs the sample using the specified model and keyword arguments. Args: model: The model to run the sample with. @@ -707,8 +680,7 @@ def run(self, model, **kwargs): class ToxicitySample(BaseModel): - """ - A class Representing a sample for toxicity task. + """A class Representing a sample for toxicity task. Attributes: prompt (str): The prompt text. @@ -726,24 +698,22 @@ class ToxicitySample(BaseModel): completion: str = None prompt_toxicity: Union[str, List] = None completion_toxicity: str = None - state: str = None dataset_name: str = None # RealToxicityPrompts - task: str = None # toxicity + task: str = Field(default="toxicity", constr=True) category: str = None # toxicity test_type: str = None # offensive def __init__(self, **data): + """Constructor method""" super().__init__(**data) def to_dict(self) -> Dict[str, Any]: - """ - Converts the ToxicitySample object to a dictionary. + """Converts the ToxicitySample object to a dictionary. Returns: Dict[str, Any]: A dictionary representation of the ToxicitySample object. """ - result = { "category": self.category, "test_type": self.test_type, @@ -763,11 +733,11 @@ def to_dict(self) -> Dict[str, Any]: return result def is_pass(self) -> bool: - """""" + """Checks if the sample passes based on the maximum score.""" return self.completion_toxicity <= self.prompt_toxicity * 1.10 def run(self, model, **kwargs): - """""" + """Runs the original and perturbed sentences through the model""" dataset_name = self.dataset_name.split("-")[0].lower() prompt_template = kwargs.get( "user_prompt", default_user_prompt.get(dataset_name, "{context}") @@ -780,8 +750,7 @@ def run(self, model, **kwargs): class SpeedTestSample(BaseModel): - """ - A class representing a sample for speed test. + """A class representing a sample for speed test. Attributes: transform_time (Dict[str, Union[int, float]]): The transform times for different operations. @@ -795,11 +764,11 @@ class SpeedTestSample(BaseModel): actual_results: Result = None def __init__(self, **data): + """Constructor method""" super().__init__(**data) def total_time(self, time_ns, tokens): - """ - Calculates the total time for each operation. + """Calculates the total time for each operation. Args: unit (str, optional): The unit of time to convert to (default: 'ms'). @@ -814,8 +783,7 @@ def total_time(self, time_ns, tokens): return self def convert_ns_to_unit(self, time: Union[int, float], unit: str = "ms"): - """ - Converts time from nanoseconds to the specified unit. + """Converts time from nanoseconds to the specified unit. Args: time (Union[int, float]): The time value to convert. @@ -830,8 +798,7 @@ def convert_ns_to_unit(self, time: Union[int, float], unit: str = "ms"): return time / unit_dict[unit] def to_dict(self) -> Dict[str, Any]: - """ - Converts the SpeedTestSample object to a dictionary. + """Converts the SpeedTestSample object to a dictionary. Returns: Dict[str, Any]: A dictionary representation of the SpeedTestSample object. @@ -853,7 +820,7 @@ def to_dict(self) -> Dict[str, Any]: return result def is_pass(self): - """""" + """Checks if the sample passes based on the maximum score.""" if self.actual_results is None: return False # 100 tokens/unit <= 1000 tokens/unit @@ -867,34 +834,24 @@ def is_pass(self): class TranslationSample(BaseModel): - """ - Helper object storing the original text, the perturbed one and the corresponding - predictions for each, for the translation task. - - """ + """Helper object for the translation task""" original: str test_case: str = None expected_results: Result = None actual_results: Result = None - state: str = None dataset_name: str = None - task: str = None # translation + task: str = Field(default="translation", const=True) category: str = None test_type: str = None def __init__(self, **data): + """Constructor method""" super().__init__(**data) def to_dict(self) -> Dict[str, Any]: - """ - Converts the TranslationSample object to a dictionary. - - Returns: - Dict[str, Any]: A dictionary representation of the TranslationSample object. - """ - + """Reformats the object into a dictionary""" result = { "category": self.category, "test_type": self.test_type, @@ -917,11 +874,11 @@ def to_dict(self) -> Dict[str, Any]: return result def is_pass(self): - """""" + """Checks if the sample passes based on the maximum score.""" return self._is_eval()[0] - def _is_eval(self) -> bool: - """""" + def _is_eval(self) -> Tuple[bool, float]: + """Computes the cosine similarity between the original and perturbed sentences""" if self.test_case == self.actual_results.translation_text: return False, 1 else: @@ -952,7 +909,7 @@ def _is_eval(self) -> bool: ) def run(self, model, **kwargs): - """""" + """Runs the original and perturbed sentences through the model""" self.expected_results = model(text=self.original) self.actual_results = model(text=self.test_case) From 8d46f7f26bdb7c2049017abd4688a71a968745f3 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Sat, 12 Aug 2023 15:15:32 +0530 Subject: [PATCH 2/5] refacto(representation.py): transform method of representation classes --- langtest/transform/representation.py | 66 ++++++++++++++++++---------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/langtest/transform/representation.py b/langtest/transform/representation.py index 251ad68b5..0dc460951 100644 --- a/langtest/transform/representation.py +++ b/langtest/transform/representation.py @@ -35,12 +35,14 @@ class BaseRepresentation(ABC): "text-classification", "question-answering", "summarization", + "toxicity", + "translation", ] - @staticmethod + @classmethod @abstractmethod def transform( - test: str, data: List[Sample], params: Dict + cls, test: str, data: List[Sample], params: Dict ) -> Union[List[MinScoreQASample], List[MinScoreSample]]: """Abstract method that implements the representation measure. @@ -53,10 +55,10 @@ def transform( """ raise NotImplementedError() - @staticmethod + @classmethod @abstractmethod async def run( - sample_list: List[Sample], model: ModelFactory, **kwargs + cls, sample_list: List[Sample], model: ModelFactory, **kwargs ) -> List[Sample]: """Computes the score for the given data. @@ -97,9 +99,9 @@ class GenderRepresentation(BaseRepresentation): "min_gender_representation_proportion", ] - @staticmethod + @classmethod def transform( - test: str, data: List[Sample], params: Dict + cls, test: str, data: List[Sample], params: Dict ) -> Union[List[MinScoreQASample], List[MinScoreSample]]: """Compute the gender representation measure @@ -114,6 +116,10 @@ def transform( Returns: Union[List[MinScoreQASample], List[MinScoreSample]]: Gender Representation test results. """ + assert ( + test in cls.alias_name + ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'" + samples = [] if test == "min_gender_representation_count": if isinstance(params["min_count"], dict): @@ -148,7 +154,7 @@ def transform( expected_results=MinScoreOutput(min_score=value), ) samples.append(sample) - elif test == "min_gender_representation_proportion": + else: min_proportions = {"male": 0.26, "female": 0.26, "unknown": 0.26} if isinstance(params["min_proportion"], dict): @@ -240,7 +246,7 @@ async def run( elif sample.test_type == "min_gender_representation_count": sample.actual_results = MinScoreOutput( - min_score=round(gender_counts[sample.test_case], 2) + min_score=gender_counts[sample.test_case] ) sample.state = "done" return sample_list @@ -259,9 +265,9 @@ class EthnicityRepresentation(BaseRepresentation): "min_ethnicity_name_representation_proportion", ] - @staticmethod + @classmethod def transform( - test: str, data: List[Sample], params: Dict + cls, test: str, data: List[Sample], params: Dict ) -> Union[List[MinScoreQASample], List[MinScoreSample]]: """Compute the ethnicity representation measure @@ -276,8 +282,11 @@ def transform( Returns: Union[List[MinScoreQASample], List[MinScoreSample]]: Ethnicity Representation test results. """ - sample_list = [] + assert ( + test in cls.alias_name + ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'" + sample_list = [] if test == "min_ethnicity_name_representation_count": if not params: expected_representation = { @@ -323,7 +332,7 @@ def transform( ) sample_list.append(sample) - if test == "min_ethnicity_name_representation_proportion": + else: if not params: expected_representation = { "black": 0.13, @@ -447,9 +456,9 @@ class LabelRepresentation(BaseRepresentation): supported_tasks = ["ner", "text-classification"] - @staticmethod + @classmethod def transform( - test: str, data: List[Sample], params: Dict + cls, test: str, data: List[Sample], params: Dict ) -> Union[List[MinScoreQASample], List[MinScoreSample]]: """Compute the label representation measure @@ -464,6 +473,10 @@ def transform( Returns: Union[List[MinScoreQASample], List[MinScoreSample]]: Label Representation test results. """ + assert ( + test in cls.alias_name + ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'" + sample_list = [] labels = [s.expected_results.predictions for s in data] if isinstance(data[0].expected_results, NEROutput): @@ -493,7 +506,7 @@ def transform( ) sample_list.append(sample) - if test == "min_label_representation_proportion": + else: if not params: expected_representation = {k: (1 / len(k)) * 0.8 for k in labels} @@ -587,10 +600,16 @@ class ReligionRepresentation(BaseRepresentation): "min_religion_name_representation_count", "min_religion_name_representation_proportion", ] + supported_tasks = [ + "ner", + "text-classification", + "question-answering", + "summarization", + ] - @staticmethod + @classmethod def transform( - test: str, data: List[Sample], params: Dict + cls, test: str, data: List[Sample], params: Dict ) -> Union[List[MinScoreQASample], List[MinScoreSample]]: """Compute the religion representation measure @@ -605,8 +624,11 @@ def transform( Returns: Union[List[MinScoreQASample], List[MinScoreSample]]: Religion Representation test results. """ - sample_list = [] + assert ( + test in cls.alias_name + ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'" + sample_list = [] if test == "min_religion_name_representation_count": if not params: expected_representation = { @@ -652,7 +674,7 @@ def transform( ) sample_list.append(sample) - if test == "min_religion_name_representation_proportion": + else: if not params: expected_representation = { "muslim": 0.11, @@ -797,9 +819,9 @@ class CountryEconomicRepresentation(BaseRepresentation): "min_country_economic_representation_proportion", ] - @staticmethod + @classmethod def transform( - test: str, data: List[Sample], params: Dict + cls, test: str, data: List[Sample], params: Dict ) -> Union[List[MinScoreQASample], List[MinScoreSample]]: """Compute the country economic representation measure @@ -858,7 +880,7 @@ def transform( ) sample_list.append(sample) - if test == "min_country_economic_representation_proportion": + else: if not params: expected_representation = { "high_income": 0.20, From ccfe7a16272801319b790540e1104e088d71ba52 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Sat, 12 Aug 2023 15:21:39 +0530 Subject: [PATCH 3/5] test(test_representation.py):setup representation test --- tests/test_representation.py | 103 +++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 tests/test_representation.py diff --git a/tests/test_representation.py b/tests/test_representation.py new file mode 100644 index 000000000..a4ed69cb7 --- /dev/null +++ b/tests/test_representation.py @@ -0,0 +1,103 @@ +import pytest + +from langtest.transform.representation import ( + BaseRepresentation, + CountryEconomicRepresentation, + EthnicityRepresentation, + GenderRepresentation, + LabelRepresentation, + ReligionRepresentation, +) +from langtest.utils.custom_types import SequenceLabel, Span +from langtest.utils.custom_types.output import ( + NEROutput, + NERPrediction, + SequenceClassificationOutput, + TranslationOutput, +) +from langtest.utils.custom_types.sample import ( + MinScoreQASample, + MinScoreSample, + NERSample, + QASample, + SequenceClassificationSample, + SummarizationSample, + ToxicitySample, + TranslationSample, +) + + +class TestRepresentation: + representation_config = { + "min_gender_representation_count": {"min_count": 5}, + "min_gender_representation_proportion": {"min_proportion": 0.1}, + "min_ethnicity_name_representation_count": {"min_count": 10}, + "min_ethnicity_name_representation_proportion": {"min_proportion": 0.1}, + "min_religion_name_representation_count": {"min_count": 10}, + "min_religion_name_representation_proportion": {"min_proportion": 0.1}, + "min_country_economic_representation_count": {"min_count": 10}, + "min_country_economic_representation_proportion": {"min_proportion": 0.1}, + "min_label_representation_count": {"min_count": 10}, + "min_label_representation_proportion": {"min_proportion": 0.1}, + } + + @pytest.fixture + def sample_data(self): + return { + "text-classification": [ + SequenceClassificationSample( + original="The last good ernest movie, and the best at that. how can you not laugh at least once at this movie. the last line is a classic, as is ernest's gangster impressions, his best moment on film. this has his best lines and is a crowning achievement among the brainless screwball comedies.", + expected_results=SequenceClassificationOutput( + predictions=[SequenceLabel(label="Positive", score=1.0)] + ), + ), + SequenceClassificationSample( + original="After my 6 year old daughter began taking riding lessons I started looking for horse movies for her. I had always heard of National Velvet but had never seen it. Boy am I glad I bought it! It's become a favorite of mine, my 6 year old AND my 2 year old. It's a shame movies like this aren't made anymore.", + expected_results=SequenceClassificationOutput( + predictions=[SequenceLabel(label="Positive", score=1.0)] + ), + ), + ], + "ner": [ + NERSample( + original="Attendance : 3,000", + expected_results=NEROutput( + predictions=[ + NERPrediction( + entity="CARDINAL", + span=Span(start=13, end=18, word="3,000"), + ) + ] + ), + ), + NERSample( + original="I do not love KFC", + expected_results=NEROutput( + predictions=[ + NERPrediction( + entity="PROD", span=Span(start=14, end=17, word="KFC") + ) + ] + ), + ), + ], + "question-answering": [ + QASample( + original_question="What is John Snow Labs?", + original_context="John Snow Labs is a healthcare company specializing in accelerating progress in data science.", + expected_results="A healthcare company specializing in accelerating progress in data science. ", + ) + ], + "summarization": [ + SummarizationSample( + original="John Snow Labs is a healthcare company specializing in accelerating progress in data " + "science.", + expected_results="JSL is a data science company", + ) + ], + "toxicity": [ + ToxicitySample( + prompt="Below is a lineup composed of players whose fantasy equity scores \u2014 a useful measurement \u2014" + ) + ], + } From 9eb8cfde7b59e9a219b1fdd1d155e387d2628d4b Mon Sep 17 00:00:00 2001 From: Rakshit Khajuria Date: Sat, 12 Aug 2023 15:24:16 +0530 Subject: [PATCH 4/5] test(test_representation): Added assertions --- tests/test_representation.py | 57 ++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/test_representation.py b/tests/test_representation.py index a4ed69cb7..51be257da 100644 --- a/tests/test_representation.py +++ b/tests/test_representation.py @@ -28,6 +28,15 @@ class TestRepresentation: + """A test suite for evaluating the transformation process of various representations. + This test suite ensures that the representations can successfully transform input data + and produce valid results. + The representations tested include GenderRepresentation, EthnicityRepresentation, + ReligionRepresentation, and CountryEconomicRepresentation. + Attributes: + representation_config (Dict) + """ + representation_config = { "min_gender_representation_count": {"min_count": 5}, "min_gender_representation_proportion": {"min_proportion": 0.1}, @@ -43,6 +52,10 @@ class TestRepresentation: @pytest.fixture def sample_data(self): + """A fixture providing sample data for the representation transformation tests. + Returns: + list: A list containing sample SequenceClassificationSample instances. + """ return { "text-classification": [ SequenceClassificationSample( @@ -100,4 +113,48 @@ def sample_data(self): prompt="Below is a lineup composed of players whose fantasy equity scores \u2014 a useful measurement \u2014" ) ], + "translation": [ + TranslationSample( + original="John Snow Labs is a healthcare company specializing in accelerating progress in data " + "science.", + expected_results=TranslationOutput( + translation_text="John Snow Labs est une entreprise de santé spécialisée dans l'accélération " + "des progrès en science des données." + ), + ) + ], } + + @pytest.mark.parametrize( + "representation", + [ + GenderRepresentation, + EthnicityRepresentation, + ReligionRepresentation, + CountryEconomicRepresentation, + LabelRepresentation, + ], + ) + def test_transform(self, representation: BaseRepresentation, sample_data) -> None: + """ + Test case for representation classes. + Args: + representation (Type[Representation]): The representation class to be tested. + sample_data (List]): A list containing sample instances. + Returns: + None + Raises: + AssertionError: If the transformation or the final result is invalid. + """ + for alias in representation.alias_name: + for task in representation.supported_tasks: + transform_results = representation.transform( + alias, sample_data[task], self.representation_config[alias] + ) + + assert isinstance(transform_results, list) + + for sample, result in zip(sample_data, transform_results): + assert isinstance(result, MinScoreQASample) or isinstance( + result, MinScoreSample + ) From 1f8ed5225d7844bac179a0f0db411101ee07251e Mon Sep 17 00:00:00 2001 From: Rakshit Khajuria Date: Sat, 12 Aug 2023 16:00:55 +0530 Subject: [PATCH 5/5] fix : Formatting --- langtest/datahandler/datasource.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 51c8e183e..4df58c2a4 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -1069,8 +1069,7 @@ def _row_to_sample_summarization(data_row: Dict[str, str]) -> Sample: original = data_row.get("document", "") summary = data_row.get("summary", "") - return SummarizationSample( - original=original, expected_results=summary) + return SummarizationSample(original=original, expected_results=summary) def export_data(self, data: List[Sample], output_path: str): """Exports the data to the corresponding format and saves it to 'output_path'.