Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Large diffs are not rendered by default.

79 changes: 40 additions & 39 deletions docs/pages/tutorials/tutorials.md

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions langtest/data/config/hf_hub_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# this section is only required for LLM models
model_parameters:
max_new_tokens: 128

tests:
defaults:
min_pass_rate: 1.0

robustness:
lowercase:
min_pass_rate: 0.70
68 changes: 68 additions & 0 deletions langtest/datahandler/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -1385,6 +1385,50 @@ def load_data_summarization(
samples = [self._row_to_sample_summarization(example) for example in dataset]
return samples

def load_data_qa(
self,
question_column: str,
context_column: str,
target_column: str,
split: str,
subset: str = None,
) -> List[Sample]:
"""Load the specified split from the dataset for QA task.

Args:
feature_column (str):
Name of the column containing the input text or document.
target_column (str):
Name of the column containing the target summary.
split (str):
Name of the split to load (e.g., train, validation, test).
subset (str):
Name of the configuration or subset to load.

Returns:
List[Sample]:
Loaded split as a list of Sample objects for QA task.
"""
question_column = "question" if question_column is None else question_column
target_column = "answer" if target_column is None else target_column
split = "test" if split is None else split

if subset:
dataset = self.load_dataset(self.dataset_name, name=subset, split=split)
else:
dataset = self.load_dataset(self.dataset_name, split=split)

dataset = dataset.map(
lambda example: {
"question": example[question_column],
"context": example[context_column],
"answer": example[target_column],
}
)

samples = [self._row_to_sample_qa(example) for example in dataset]
return samples

def load_raw_data(
self,
split: str = "test",
Expand Down Expand Up @@ -1455,6 +1499,30 @@ def _row_to_sample_summarization(data_row: Dict[str, str]) -> Sample:

return SummarizationSample(original=original, expected_results=summary)

@staticmethod
def _row_to_sample_qa(data_row: Dict[str, str]) -> Sample:
"""Convert a row from the dataset into a Sample for summarization.

Args:
data_row (Dict[str, str]):
Single row of the dataset.

Returns:
Sample:
Row formatted into a Sample object for summarization.
"""
context = data_row.get("context", "")
question = data_row.get("question", "")
answer = data_row.get("answer", "")
if isinstance(answer, str):
answer = [answer]

return QASample(
original_question=question,
original_context=context,
actual_results=answer,
)

def export_data(self, data: List[Sample], output_path: str):
"""Exports the data to the corresponding format and saves it to 'output_path'.

Expand Down
112 changes: 111 additions & 1 deletion langtest/modelhandler/transformers_modelhandler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, List, Tuple
from typing import Dict, List, Tuple, Union

import numpy as np
from transformers import Pipeline, pipeline
Expand All @@ -11,6 +11,8 @@
TranslationOutput,
)

from langchain import PromptTemplate


class PretrainedModelForNER(_ModelHandler):
"""Transformers pretrained model for NER tasks
Expand Down Expand Up @@ -334,3 +336,111 @@ def predict(self, text: str, **kwargs) -> TranslationOutput:
def __call__(self, text: str, *args, **kwargs) -> TranslationOutput:
"""Alias of the 'predict' method"""
return self.predict(text=text, **kwargs)


class PretrainedModelForQA(_ModelHandler):
"""Transformers pretrained model for QA tasks

Args:
model (transformers.pipeline.Pipeline): Pretrained HuggingFace QA pipeline for predictions.
"""

def __init__(self, hub, model, **kwargs):
"""Constructor method

Args:
model (transformers.pipeline.Pipeline): Pretrained HuggingFace QA pipeline for predictions.
"""
assert isinstance(model, Pipeline), ValueError(
f"Invalid transformers pipeline! "
f"Pipeline should be '{Pipeline}', passed model is: '{type(model)}'"
)
self.model = model

@staticmethod
def load_model(hub: str, path: str, **kwargs) -> "Pipeline":
"""Load the QA model into the `model` attribute.

Args:
path (str):
path to model or model name

Returns:
'Pipeline':
"""

return pipeline(model=path, **kwargs)

def predict(self, text: Union[str, dict], prompt: dict, **kwargs) -> str:
"""Perform predictions on the input text.

Args:
text (str): Input text to perform QA on.
kwargs: Additional keyword arguments.


Returns:
str: Output model for QA tasks
"""
prompt_template = PromptTemplate(**prompt)
p = prompt_template.format(**text)
prediction = self.model(p, **kwargs)
return prediction[0]["generated_text"][len(p) :]

def __call__(self, text: Union[str, dict], prompt: dict, **kwargs) -> str:
"""Alias of the 'predict' method"""
return self.predict(text=text, prompt=prompt, **kwargs)


class PretrainedModelForSummarization(_ModelHandler):
"""Transformers pretrained model for QA tasks

Args:
model (transformers.pipeline.Pipeline): Pretrained HuggingFace QA pipeline for predictions.
"""

def __init__(self, hub, model, **kwargs):
"""Constructor method

Args:
model (transformers.pipeline.Pipeline): Pretrained HuggingFace QA pipeline for predictions.
"""
assert isinstance(model, Pipeline), ValueError(
f"Invalid transformers pipeline! "
f"Pipeline should be '{Pipeline}', passed model is: '{type(model)}'"
)
self.model = model

@staticmethod
def load_model(hub: str, path: str, **kwargs) -> "Pipeline":
"""Load the QA model into the `model` attribute.

Args:
path (str):
path to model or model name

Returns:
'Pipeline':
"""

return pipeline(model=path, **kwargs)

def predict(self, text: Union[str, dict], prompt: dict, **kwargs) -> str:
"""Perform predictions on the input text.

Args:
text (str): Input text to perform QA on.
kwargs: Additional keyword arguments.


Returns:
str: Output model for QA tasks
"""
prompt_template = PromptTemplate(**prompt)
p = prompt_template.format(**text)
prediction = self.model(p, **kwargs)
return prediction[0]["generated_text"][len(p) :]

def __call__(self, text: Union[str, dict], prompt: dict, **kwargs) -> str:
"""Alias of the 'predict' method"""
return self.predict(text=text, prompt=prompt, **kwargs)
1 change: 0 additions & 1 deletion langtest/transform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,6 @@ def __init__(self, data_handler: List[Sample], tests: Dict = None, **kwargs) ->
tests Optional[Dict]:
A dictionary of test names and corresponding parameters (default is None).
"""

self.supported_tests = self.available_tests()
self._data_handler = data_handler
self.tests = tests
Expand Down
80 changes: 50 additions & 30 deletions langtest/utils/custom_types/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,38 +495,58 @@ def is_pass(self) -> bool:
from ...transform.constants import qa_prompt_template
from langchain.prompts import PromptTemplate

if self.dataset_name not in ["BoolQ", "TruthfulQA", "Quac", "BBQ"]:
PROMPT = PromptTemplate(
input_variables=["query", "answer", "result"], template=qa_prompt_template
)
eval_chain = QAEvalChain.from_llm(
llm=llm_model.model_class.model, prompt=PROMPT
)
inputs = [
{"question": self.original_question, "answer": self.expected_results}
]

predictions = [
{"question": self.perturbed_question, "text": self.actual_results}
]

graded_outputs = eval_chain.evaluate(
inputs,
predictions,
question_key="question",
answer_key="answer",
prediction_key="text",
)
if "llm" in str(type(llm_model.model_class)):
if self.dataset_name not in ["BoolQ", "TruthfulQA", "Quac", "BBQ"]:
PROMPT = PromptTemplate(
input_variables=["query", "answer", "result"],
template=qa_prompt_template,
)
eval_chain = QAEvalChain.from_llm(
llm=llm_model.model_class.model, prompt=PROMPT
)
inputs = [
{"question": self.original_question, "answer": self.expected_results}
]

predictions = [
{"question": self.perturbed_question, "text": self.actual_results}
]

graded_outputs = eval_chain.evaluate(
inputs,
predictions,
question_key="question",
answer_key="answer",
prediction_key="text",
)
else:
eval_chain = QAEvalChain.from_llm(llm=llm_model.model_class.model)
graded_outputs = eval_chain.evaluate(
[
{
"question": self.original_question,
"answer": self.expected_results,
}
],
[{"question": self.perturbed_question, "text": self.actual_results}],
question_key="question",
prediction_key="text",
)

return graded_outputs[0]["text"].strip() == "CORRECT"
else:
eval_chain = QAEvalChain.from_llm(llm=llm_model.model_class.model)
graded_outputs = eval_chain.evaluate(
[{"question": self.original_question, "answer": self.expected_results}],
[{"question": self.perturbed_question, "text": self.actual_results}],
question_key="question",
prediction_key="text",
prediction = llm_model(
text={
"query": self.perturbed_question,
"answer": self.expected_results,
"result": self.actual_results,
},
prompt={
"input_variables": ["query", "answer", "result"],
"template": qa_prompt_template,
},
)

return graded_outputs[0]["text"].strip() == "CORRECT"
return prediction == "CORRECT"


class MinScoreQASample(QASample):
Expand Down
14 changes: 13 additions & 1 deletion tests/test_huggingface_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def setUp(self) -> None:

self.tasks = ["ner", "text-classifier"]

def test_transformers_models(self):
def test_transformers_ner_models(self):
"""
Test loading Hugging Face models.

Expand All @@ -39,6 +39,18 @@ def test_transformers_models(self):
)
self.assertIsInstance(model, ModelFactory)

def test_transformers_QA_models(self):
"""
Test loading Hugging Face models.

This method tests the loading of a Hugging Face model using the `ModelFactory` class.
It asserts that the loaded model is an instance of `ModelFactory`.
"""
model = ModelFactory.load_model(
task="question-answering", hub="huggingface", path="gpt2"
)
self.assertIsInstance(model, ModelFactory)

def test_unsupported_task(self):
"""
Test unsupported task.
Expand Down