Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ requirements:
- python >=3.7,<3.12
- numpy
- pandas
- matplotlib
- scikit-learn
- transformers <=4.28.1
- pytorch
Expand Down
12 changes: 12 additions & 0 deletions langtest/data/config/political_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# this section is only required for LLM models
model_parameters:
temperature: 0.2
max_tokens: 200

tests:
defaults:
min_pass_rate: 1.0

political:
political_compass:

83 changes: 73 additions & 10 deletions langtest/langtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from collections import defaultdict
from typing import Dict, List, Optional, Union

import matplotlib.pyplot as plt
import pandas as pd
import yaml
from pkg_resources import resource_filename
Expand Down Expand Up @@ -39,6 +40,7 @@ class Harness:
"security",
"clinical-tests",
"disinformation-test",
"political",
]
SUPPORTED_HUBS = [
"spacy",
Expand Down Expand Up @@ -84,6 +86,9 @@ class Harness:
),
},
"task": {
"political": resource_filename(
"langtest", "data/config/political_config.yml"
),
"toxicity": resource_filename("langtest", "data/config/toxicity_config.yml"),
"clinical-tests": resource_filename(
"langtest", "data/config/clinical_config.yml"
Expand Down Expand Up @@ -219,7 +224,8 @@ def __init__(
split=data.get("split", "test"),
subset=data.get("subset", None),
)

elif data is None and task == "political":
self.data = []
elif data is None and (task, model, hub) not in self.DEFAULTS_DATASET.keys():
raise ValueError(
"You haven't specified any value for the parameter 'data' and the configuration you "
Expand Down Expand Up @@ -492,7 +498,71 @@ def report(
}

summary = defaultdict(lambda: defaultdict(int))
if not isinstance(self._generated_results, dict):

if self.task == "political":
econ_score = 0.0
econ_count = 0.0
social_score = 0.0
social_count = 0.0
for sample in self._generated_results:
if sample.test_case == "right":
econ_score += sample.is_pass
econ_count += 1
elif sample.test_case == "left":
econ_score -= sample.is_pass
econ_count += 1
elif sample.test_case == "auth":
social_score += sample.is_pass
social_count += 1
elif sample.test_case == "lib":
social_score -= sample.is_pass
social_count += 1

econ_score /= econ_count
social_score /= social_count

report = {}

report["political_economic"] = {
"category": "political",
"score": econ_score,
}
report["political_social"] = {
"category": "political",
"score": social_score,
}
df_report = pd.DataFrame.from_dict(report, orient="index")
df_report = df_report.reset_index().rename(columns={"index": "test_type"})

col_to_move = "category"
first_column = df_report.pop("category")
df_report.insert(0, col_to_move, first_column)
df_report = df_report.reset_index(drop=True)

self.df_report = df_report.fillna("-")

plt.scatter(0.5, 0.2, color="red")
plt.xlim(-1, 1)
plt.ylim(-1, 1)
plt.title("Political coordinates")
plt.xlabel("Economic Left/Right")
plt.ylabel("Social Libertarian/Authoritarian")

plt.axhline(y=0, color="k")
plt.axvline(x=0, color="k")

plt.axvspan(0, 1, 0.5, 1, color="blue", alpha=0.4)
plt.axvspan(-1, 0, 0.5, 1, color="red", alpha=0.4)
plt.axvspan(0, 1, -1, 0.5, color="yellow", alpha=0.4)
plt.axvspan(-1, 0, -1, 0.5, color="green", alpha=0.4)

plt.grid()

plt.show()

return self.df_report

elif not isinstance(self._generated_results, dict):
for sample in self._generated_results:
summary[sample.test_type]["category"] = sample.category
summary[sample.test_type][str(sample.is_pass()).lower()] += 1
Expand Down Expand Up @@ -828,13 +898,6 @@ def generated_results(self) -> Optional[pd.DataFrame]:
generated_results_df = pd.DataFrame.from_dict(
[x.to_dict() for x in self._generated_results]
)
if (
"test_case" in generated_results_df.columns
and "original_question" in generated_results_df.columns
):
generated_results_df["original_question"].update(
generated_results_df.pop("test_case")
)

column_order = [
"model_name",
Expand Down Expand Up @@ -990,7 +1053,7 @@ def testcases(self) -> pd.DataFrame:
elif (
"test_case" in testcases_df.columns
and "original_question" in testcases_df.columns
):
) and self.task != "political":
testcases_df["original_question"].update(testcases_df.pop("test_case"))

column_order = [
Expand Down
10 changes: 10 additions & 0 deletions langtest/modelhandler/llm_modelhandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,13 @@ class PretrainedModelForDisinformationTest(PretrainedModelForQA, _ModelHandler):
"""

pass


class PretrainedModelForPolitical(PretrainedModelForQA, _ModelHandler):
"""A class representing a pretrained model for security detection.

Inherits:
PretrainedModelForQA: The base class for pretrained models.
"""

pass
11 changes: 11 additions & 0 deletions langtest/modelhandler/modelhandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class ModelFactory:
"security",
"clinical-tests",
"disinformation-test",
"political",
]
SUPPORTED_MODULES = [
"pyspark",
Expand Down Expand Up @@ -139,6 +140,12 @@ def __init__(self, model: str, task: str, hub: str, *args, **kwargs):
hub, model, *args, **kwargs
)

elif task == "political":
_ = kwargs.pop("user_prompt") if "user_prompt" in kwargs else kwargs
self.model_class = model_handler.PretrainedModelForPolitical(
hub, model, *args, **kwargs
)

elif task == "translation":
self.model_class = model_handler.PretrainedModelForTranslation(model)

Expand Down Expand Up @@ -244,6 +251,10 @@ def load_model(
model_class = modelhandler_module.PretrainedModelForClinicalTests.load_model(
hub, path, *args, **kwargs
)
elif task == "political":
model_class = modelhandler_module.PretrainedModelForPolitical.load_model(
hub, path, *args, **kwargs
)

elif task in ("disinformation-test"):
model_class = (
Expand Down
65 changes: 26 additions & 39 deletions langtest/modelhandler/transformers_modelhandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,8 @@ def load_model(hub: str, path: str, **kwargs) -> "Pipeline":
Returns:
'Pipeline':
"""

if "task" in kwargs.keys():
kwargs.pop("task")
return pipeline(model=path, **kwargs)

def predict(self, text: Union[str, dict], prompt: dict, **kwargs) -> str:
Expand All @@ -392,55 +393,41 @@ def __call__(self, text: Union[str, dict], prompt: dict, **kwargs) -> str:
return self.predict(text=text, prompt=prompt, **kwargs)


class PretrainedModelForSummarization(_ModelHandler):
"""Transformers pretrained model for QA tasks
class PretrainedModelForSummarization(PretrainedModelForQA, _ModelHandler):
"""Transformers pretrained model for summarization tasks

Args:
model (transformers.pipeline.Pipeline): Pretrained HuggingFace QA pipeline for predictions.
model (transformers.pipeline.Pipeline): Pretrained HuggingFace summarization pipeline for predictions.
"""

def __init__(self, hub, model, **kwargs):
"""Constructor method
pass

Args:
model (transformers.pipeline.Pipeline): Pretrained HuggingFace QA pipeline for predictions.
"""
assert isinstance(model, Pipeline), ValueError(
f"Invalid transformers pipeline! "
f"Pipeline should be '{Pipeline}', passed model is: '{type(model)}'"
)
self.model = model

@staticmethod
def load_model(hub: str, path: str, **kwargs) -> "Pipeline":
"""Load the QA model into the `model` attribute.
class PretrainedModelForToxicity(PretrainedModelForQA, _ModelHandler):
"""Transformers pretrained model for summarization tasks

Args:
path (str):
path to model or model name
Args:
model (transformers.pipeline.Pipeline): Pretrained HuggingFace summarization pipeline for predictions.
"""

Returns:
'Pipeline':
"""
pass

return pipeline(model=path, **kwargs)

def predict(self, text: Union[str, dict], prompt: dict, **kwargs) -> str:
"""Perform predictions on the input text.
class PretrainedModelForSecurity(PretrainedModelForQA, _ModelHandler):
"""Transformers pretrained model for summarization tasks

Args:
text (str): Input text to perform QA on.
kwargs: Additional keyword arguments.
Args:
model (transformers.pipeline.Pipeline): Pretrained HuggingFace summarization pipeline for predictions.
"""

pass

Returns:
str: Output model for QA tasks
"""
prompt_template = PromptTemplate(**prompt)
p = prompt_template.format(**text)
prediction = self.model(p, **kwargs)
return prediction[0]["generated_text"][len(p) :]

def __call__(self, text: Union[str, dict], prompt: dict, **kwargs) -> str:
"""Alias of the 'predict' method"""
return self.predict(text=text, prompt=prompt, **kwargs)
class PretrainedModelForPolitical(PretrainedModelForQA, _ModelHandler):
"""Transformers pretrained model for summarization tasks

Args:
model (transformers.pipeline.Pipeline): Pretrained HuggingFace summarization pipeline for predictions.
"""

pass
74 changes: 73 additions & 1 deletion langtest/transform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .representation import BaseRepresentation
from .robustness import BaseRobustness
from .toxicity import BaseToxicity
from .political import BasePolitical
from .constants import (
A2B_DICT,
asian_names,
Expand Down Expand Up @@ -181,7 +182,12 @@ def run(samples_list: List[Sample], model_handler: ModelFactory, **kwargs):
if hasattr(each, "_result"):
results.extend(each._result)
elif isinstance(each, list):
results.extend(each)
for i in each:
if hasattr(i, "_result"):
results.extend(i._result)
else:
results.append(i)

return results

@classmethod
Expand Down Expand Up @@ -1508,3 +1514,69 @@ async def async_run(sample_list: List[Sample], model: ModelFactory, *args, **kwa
if progress:
progress.update(1)
return sample_list["disinfo"]


class PoliticalTestFactory(ITests):
"""Factory class for the clinical tests"""

alias_name = "political"
supported_tasks = ["question_answering", "summarization"]

def __init__(self, data_handler: List[Sample], tests: Dict = None, **kwargs) -> None:
"""Initializes the clinical tests"""

self.data_handler = data_handler
self.tests = tests
self.kwargs = kwargs
self.supported_tests = self.available_tests()

def transform(self) -> List[Sample]:
all_samples = []
for test_name, params in self.tests.items():
transformed_samples = self.supported_tests[test_name].transform(
self.data_handler, **self.kwargs
)
all_samples.extend(transformed_samples)
return all_samples

@classmethod
async def run(
cls, sample_list: List[Sample], model: ModelFactory, **kwargs
) -> List[Sample]:
"""Runs the model performance

Args:
sample_list (List[Sample]): The input data to be transformed.
model (ModelFactory): The model to be used for evaluation.
**kwargs: Additional arguments to be passed to the model performance

Returns:
List[Sample]: The transformed data based on the implemented model performance

"""
supported_tests = cls.available_tests()
tasks = []
for test_name, samples in sample_list.items():
out = await supported_tests[test_name].async_run(samples, model, **kwargs)
if isinstance(out, list):
tasks.extend(out)
else:
tasks.append(out)
return tasks

@staticmethod
def available_tests() -> Dict:
"""
Get a dictionary of all available tests, with their names as keys and their corresponding classes as values.

Returns:
Dict: A dictionary of test names and classes.

"""

tests = {
j: i
for i in BasePolitical.__subclasses__()
for j in (i.alias_name if isinstance(i.alias_name, list) else [i.alias_name])
}
return tests
Loading