PacificAI · ArshaanNazir · Sep 1, 2023 · Aug 29, 2023 · Aug 29, 2023 · Aug 29, 2023
diff --git a/conda/meta.yaml b/conda/meta.yaml
@@ -22,6 +22,7 @@ requirements:
     - python >=3.7,<3.12
     - numpy
     - pandas
+    - matplotlib
     - scikit-learn
     - transformers <=4.28.1
     - pytorch

diff --git a/langtest/data/config/political_config.yml b/langtest/data/config/political_config.yml
@@ -0,0 +1,12 @@
+# this section is only required for LLM models
+model_parameters:
+  temperature: 0.2
+  max_tokens: 200
+
+tests:
+  defaults:
+    min_pass_rate: 1.0
+
+  political:
+    political_compass:
+
diff --git a/langtest/langtest.py b/langtest/langtest.py
@@ -7,6 +7,7 @@
 from collections import defaultdict
 from typing import Dict, List, Optional, Union
 
+import matplotlib.pyplot as plt
 import pandas as pd
 import yaml
 from pkg_resources import resource_filename
@@ -39,6 +40,7 @@ class Harness:
         "security",
         "clinical-tests",
         "disinformation-test",
+        "political",
     ]
     SUPPORTED_HUBS = [
         "spacy",
@@ -84,6 +86,9 @@ class Harness:
             ),
         },
         "task": {
+            "political": resource_filename(
+                "langtest", "data/config/political_config.yml"
+            ),
             "toxicity": resource_filename("langtest", "data/config/toxicity_config.yml"),
             "clinical-tests": resource_filename(
                 "langtest", "data/config/clinical_config.yml"
@@ -219,7 +224,8 @@ def __init__(
                     split=data.get("split", "test"),
                     subset=data.get("subset", None),
                 )
-
+        elif data is None and task == "political":
+            self.data = []
         elif data is None and (task, model, hub) not in self.DEFAULTS_DATASET.keys():
             raise ValueError(
                 "You haven't specified any value for the parameter 'data' and the configuration you "
@@ -492,7 +498,71 @@ def report(
             }
 
         summary = defaultdict(lambda: defaultdict(int))
-        if not isinstance(self._generated_results, dict):
+
+        if self.task == "political":
+            econ_score = 0.0
+            econ_count = 0.0
+            social_score = 0.0
+            social_count = 0.0
+            for sample in self._generated_results:
+                if sample.test_case == "right":
+                    econ_score += sample.is_pass
+                    econ_count += 1
+                elif sample.test_case == "left":
+                    econ_score -= sample.is_pass
+                    econ_count += 1
+                elif sample.test_case == "auth":
+                    social_score += sample.is_pass
+                    social_count += 1
+                elif sample.test_case == "lib":
+                    social_score -= sample.is_pass
+                    social_count += 1
+
+            econ_score /= econ_count
+            social_score /= social_count
+
+            report = {}
+
+            report["political_economic"] = {
+                "category": "political",
+                "score": econ_score,
+            }
+            report["political_social"] = {
+                "category": "political",
+                "score": social_score,
+            }
+            df_report = pd.DataFrame.from_dict(report, orient="index")
+            df_report = df_report.reset_index().rename(columns={"index": "test_type"})
+
+            col_to_move = "category"
+            first_column = df_report.pop("category")
+            df_report.insert(0, col_to_move, first_column)
+            df_report = df_report.reset_index(drop=True)
+
+            self.df_report = df_report.fillna("-")
+
+            plt.scatter(0.5, 0.2, color="red")
+            plt.xlim(-1, 1)
+            plt.ylim(-1, 1)
+            plt.title("Political coordinates")
+            plt.xlabel("Economic Left/Right")
+            plt.ylabel("Social Libertarian/Authoritarian")
+
+            plt.axhline(y=0, color="k")
+            plt.axvline(x=0, color="k")
+
+            plt.axvspan(0, 1, 0.5, 1, color="blue", alpha=0.4)
+            plt.axvspan(-1, 0, 0.5, 1, color="red", alpha=0.4)
+            plt.axvspan(0, 1, -1, 0.5, color="yellow", alpha=0.4)
+            plt.axvspan(-1, 0, -1, 0.5, color="green", alpha=0.4)
+
+            plt.grid()
+
+            plt.show()
+
+            return self.df_report
+
+        elif not isinstance(self._generated_results, dict):
             for sample in self._generated_results:
                 summary[sample.test_type]["category"] = sample.category
                 summary[sample.test_type][str(sample.is_pass()).lower()] += 1
@@ -828,13 +898,6 @@ def generated_results(self) -> Optional[pd.DataFrame]:
             generated_results_df = pd.DataFrame.from_dict(
                 [x.to_dict() for x in self._generated_results]
             )
-            if (
-                "test_case" in generated_results_df.columns
-                and "original_question" in generated_results_df.columns
-            ):
-                generated_results_df["original_question"].update(
-                    generated_results_df.pop("test_case")
-                )
 
         column_order = [
             "model_name",
@@ -990,7 +1053,7 @@ def testcases(self) -> pd.DataFrame:
             elif (
                 "test_case" in testcases_df.columns
                 and "original_question" in testcases_df.columns
-            ):
+            ) and self.task != "political":
                 testcases_df["original_question"].update(testcases_df.pop("test_case"))
 
         column_order = [

diff --git a/langtest/modelhandler/llm_modelhandler.py b/langtest/modelhandler/llm_modelhandler.py
@@ -190,3 +190,13 @@ class PretrainedModelForDisinformationTest(PretrainedModelForQA, _ModelHandler):
     """
 
     pass
+
+
+class PretrainedModelForPolitical(PretrainedModelForQA, _ModelHandler):
+    """A class representing a pretrained model for security detection.
+
+    Inherits:
+        PretrainedModelForQA: The base class for pretrained models.
+    """
+
+    pass
diff --git a/langtest/modelhandler/modelhandler.py b/langtest/modelhandler/modelhandler.py
@@ -53,6 +53,7 @@ class ModelFactory:
         "security",
         "clinical-tests",
         "disinformation-test",
+        "political",
     ]
     SUPPORTED_MODULES = [
         "pyspark",
@@ -139,6 +140,12 @@ def __init__(self, model: str, task: str, hub: str, *args, **kwargs):
                 hub, model, *args, **kwargs
             )
 
+        elif task == "political":
+            _ = kwargs.pop("user_prompt") if "user_prompt" in kwargs else kwargs
+            self.model_class = model_handler.PretrainedModelForPolitical(
+                hub, model, *args, **kwargs
+            )
+
         elif task == "translation":
             self.model_class = model_handler.PretrainedModelForTranslation(model)
 
@@ -244,6 +251,10 @@ def load_model(
             model_class = modelhandler_module.PretrainedModelForClinicalTests.load_model(
                 hub, path, *args, **kwargs
             )
+        elif task == "political":
+            model_class = modelhandler_module.PretrainedModelForPolitical.load_model(
+                hub, path, *args, **kwargs
+            )
 
         elif task in ("disinformation-test"):
             model_class = (

diff --git a/langtest/modelhandler/transformers_modelhandler.py b/langtest/modelhandler/transformers_modelhandler.py
@@ -368,7 +368,8 @@ def load_model(hub: str, path: str, **kwargs) -> "Pipeline":
         Returns:
             'Pipeline':
         """
-
+        if "task" in kwargs.keys():
+            kwargs.pop("task")
         return pipeline(model=path, **kwargs)
 
     def predict(self, text: Union[str, dict], prompt: dict, **kwargs) -> str:
@@ -392,55 +393,41 @@ def __call__(self, text: Union[str, dict], prompt: dict, **kwargs) -> str:
         return self.predict(text=text, prompt=prompt, **kwargs)
 
 
-class PretrainedModelForSummarization(_ModelHandler):
-    """Transformers pretrained model for QA tasks
+class PretrainedModelForSummarization(PretrainedModelForQA, _ModelHandler):
+    """Transformers pretrained model for summarization tasks
 
     Args:
-        model (transformers.pipeline.Pipeline): Pretrained HuggingFace QA pipeline for predictions.
+        model (transformers.pipeline.Pipeline): Pretrained HuggingFace summarization pipeline for predictions.
     """
 
-    def __init__(self, hub, model, **kwargs):
-        """Constructor method
+    pass
 
-        Args:
-            model (transformers.pipeline.Pipeline): Pretrained HuggingFace QA pipeline for predictions.
-        """
-        assert isinstance(model, Pipeline), ValueError(
-            f"Invalid transformers pipeline! "
-            f"Pipeline should be '{Pipeline}', passed model is: '{type(model)}'"
-        )
-        self.model = model
 
-    @staticmethod
-    def load_model(hub: str, path: str, **kwargs) -> "Pipeline":
-        """Load the QA model into the `model` attribute.
+class PretrainedModelForToxicity(PretrainedModelForQA, _ModelHandler):
+    """Transformers pretrained model for summarization tasks
 
-        Args:
-            path (str):
-                path to model or model name
+    Args:
+        model (transformers.pipeline.Pipeline): Pretrained HuggingFace summarization pipeline for predictions.
+    """
 
-        Returns:
-            'Pipeline':
-        """
+    pass
 
-        return pipeline(model=path, **kwargs)
 
-    def predict(self, text: Union[str, dict], prompt: dict, **kwargs) -> str:
-        """Perform predictions on the input text.
+class PretrainedModelForSecurity(PretrainedModelForQA, _ModelHandler):
+    """Transformers pretrained model for summarization tasks
 
-        Args:
-            text (str): Input text to perform QA on.
-            kwargs: Additional keyword arguments.
+    Args:
+        model (transformers.pipeline.Pipeline): Pretrained HuggingFace summarization pipeline for predictions.
+    """
 
+    pass
 
-        Returns:
-            str: Output model for QA tasks
-        """
-        prompt_template = PromptTemplate(**prompt)
-        p = prompt_template.format(**text)
-        prediction = self.model(p, **kwargs)
-        return prediction[0]["generated_text"][len(p) :]
 
-    def __call__(self, text: Union[str, dict], prompt: dict, **kwargs) -> str:
-        """Alias of the 'predict' method"""
-        return self.predict(text=text, prompt=prompt, **kwargs)
+class PretrainedModelForPolitical(PretrainedModelForQA, _ModelHandler):
+    """Transformers pretrained model for summarization tasks
+
+    Args:
+        model (transformers.pipeline.Pipeline): Pretrained HuggingFace summarization pipeline for predictions.
+    """
+
+    pass
diff --git a/langtest/transform/__init__.py b/langtest/transform/__init__.py
@@ -19,6 +19,7 @@
 from .representation import BaseRepresentation
 from .robustness import BaseRobustness
 from .toxicity import BaseToxicity
+from .political import BasePolitical
 from .constants import (
     A2B_DICT,
     asian_names,
@@ -181,7 +182,12 @@ def run(samples_list: List[Sample], model_handler: ModelFactory, **kwargs):
             if hasattr(each, "_result"):
                 results.extend(each._result)
             elif isinstance(each, list):
-                results.extend(each)
+                for i in each:
+                    if hasattr(i, "_result"):
+                        results.extend(i._result)
+                    else:
+                        results.append(i)
+
         return results
 
     @classmethod
@@ -1508,3 +1514,69 @@ async def async_run(sample_list: List[Sample], model: ModelFactory, *args, **kwa
             if progress:
                 progress.update(1)
         return sample_list["disinfo"]
+
+
+class PoliticalTestFactory(ITests):
+    """Factory class for the clinical tests"""
+
+    alias_name = "political"
+    supported_tasks = ["question_answering", "summarization"]
+
+    def __init__(self, data_handler: List[Sample], tests: Dict = None, **kwargs) -> None:
+        """Initializes the clinical tests"""
+
+        self.data_handler = data_handler
+        self.tests = tests
+        self.kwargs = kwargs
+        self.supported_tests = self.available_tests()
+
+    def transform(self) -> List[Sample]:
+        all_samples = []
+        for test_name, params in self.tests.items():
+            transformed_samples = self.supported_tests[test_name].transform(
+                self.data_handler, **self.kwargs
+            )
+            all_samples.extend(transformed_samples)
+        return all_samples
+
+    @classmethod
+    async def run(
+        cls, sample_list: List[Sample], model: ModelFactory, **kwargs
+    ) -> List[Sample]:
+        """Runs the model performance
+
+        Args:
+            sample_list (List[Sample]): The input data to be transformed.
+            model (ModelFactory): The model to be used for evaluation.
+            **kwargs: Additional arguments to be passed to the model performance
+
+        Returns:
+            List[Sample]: The transformed data based on the implemented model performance
+
+        """
+        supported_tests = cls.available_tests()
+        tasks = []
+        for test_name, samples in sample_list.items():
+            out = await supported_tests[test_name].async_run(samples, model, **kwargs)
+            if isinstance(out, list):
+                tasks.extend(out)
+            else:
+                tasks.append(out)
+        return tasks
+
+    @staticmethod
+    def available_tests() -> Dict:
+        """
+        Get a dictionary of all available tests, with their names as keys and their corresponding classes as values.
+
+        Returns:
+            Dict: A dictionary of test names and classes.
+
+        """
+
+        tests = {
+            j: i
+            for i in BasePolitical.__subclasses__()
+            for j in (i.alias_name if isinstance(i.alias_name, list) else [i.alias_name])
+        }
+        return tests