From 5b76e46df14fb661355f3a05d1eac0a4ca6f48c3 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Thu, 20 Jul 2023 12:16:33 +0530 Subject: [PATCH 01/24] custom-column-names for text-classification and summarization --- langtest/datahandler/datasource.py | 215 +++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 084f1f32f..ec3583c2d 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -876,3 +876,218 @@ def _row_to_sample_classification(self, data_row: Dict[str, str]) -> Sample: original=original, expected_results=SequenceClassificationOutput(predictions=[label]), ) + + +class CustomCSVDataset(_IDataset): + """ + A class to handle CSV files datasets. Subclass of _IDataset. + + Attributes: + _file_path (str): + The path to the data file. + task (str): + Specifies the task of the dataset, which can be either "text-classification" + or "summarization". + delimiter (str): + The delimiter used in the CSV file to separate columns. + """ + def __init__(self, file_path: str, task: str, **kwargs) -> None: + """ + Initializes a CustomCSVDataset object. + + Args: + file_path (str): + The path to the data file containing the CSV data. + task (str): + Specifies the task of the dataset, which can be either "text-classification" + or "summarization". + **kwargs: + Additional keyword arguments that can be used to configure the dataset (optional). + """ + super().__init__() + self._file_path = file_path + self.task = task + self.delimiter = self._find_delimiter(file_path) + + def load_data_classification( + self, + dataset: pd.DataFrame, + feature_column: str = "text", + target_column: str = "label", + ) -> List[Sample]: + """ + Load the specified split from the dataset library for classification task. + + Args: + dataset (pd.DataFrame): + The input dataset containing the text data and corresponding labels. + feature_column (str, optional): + Name of the column in the dataset containing the input text data. + Default is "text". + target_column (str, optional): + Name of the column in the dataset containing the target labels for classification. + Default is "label". + + Returns: + List[Sample]: + Loaded split as a list of Sample objects, where each Sample object consists + of an input text and its corresponding label. + """ + + if feature_column and target_column: + dataset.rename(columns={feature_column: "text", target_column: "label"}, inplace=True) + + + samples = [self._row_to_seq_classification_sample(row) for _, row in dataset.iterrows()] + return samples + + + + def load_data_summarization( + self, + dataset: pd.DataFrame, + feature_column: str = "document", + target_column: str = "summary", + ) -> List[Sample]: + """ + Load the specified split from the dataset library for summarization task. + + Args: + dataset (pd.DataFrame): + The input dataset containing the document data and corresponding summaries. + feature_column (str, optional): + Name of the column in the dataset containing the input document data. + Default is "document". + target_column (str, optional): + Name of the column in the dataset containing the target summaries for summarization. + Default is "summary". + + Returns: + List[Sample]: + Loaded split as a list of Sample objects for summarization task, where each + Sample object contains a document and its corresponding summary. + """ + + if feature_column and target_column: + dataset.rename(columns={feature_column: "document", target_column: "summary"},inplace=True) + samples = [self._row_to_sample_summarization(row) for _, row in dataset.iterrows()] + return samples + + + + def load_data( + self, + feature_column: str, + target_column: str + ) -> List[Sample]: + """ + Load the specified split from the dataset library based on the task. + + Args: + feature_column (str): + Name of the column in the dataset containing the input text data for classification + or the input document data for summarization. + target_column (str): + Name of the column in the dataset containing the target labels for classification + or the target summaries for summarization. + + Returns: + List[Sample]: + Loaded split as a list of Sample objects based on the specified task. For text + classification task, each Sample object consists of an input text and its + corresponding label. For summarization task, each Sample object contains a + document and its corresponding summary. + + Raises: + ValueError: + If the specified task is not supported or recognized. Currently supported tasks + include "text-classification" and "summarization". + """ + dataset = pd.read_csv(self._file_path, delimiter=self.delimiter) + + if self.task == "text-classification": + return self.load_data_classification(dataset, + feature_column, target_column, + ) + elif self.task == "summarization": + return self.load_data_summarization(dataset, + feature_column, target_column, + ) + else: + raise ValueError(f"Unsupported task: {self.task}") + + + def _row_to_seq_classification_sample(self, row: pd.Series) -> Sample: + """ + Convert a row from the dataset into a Sample for the text-classification task + + Args: + row (pd.Series): + Single row of the dataset as a Pandas Series + + Returns: + Sample: + Row formatted into a Sample object + """ + original = row.loc["text"] + label = SequenceLabel(label=row.loc["label"], score=1) + + return SequenceClassificationSample( + original=original, + expected_results=SequenceClassificationOutput(predictions=[label]), + ) + + def _row_to_sample_summarization(self,row: pd.Series) -> Sample: + """ + Convert a row from the dataset into a Sample for summarization. + + Args: + data_row (Dict[str, str]): + Single row of the dataset. + + Returns: + Sample: + Row formatted into a Sample object for summarization. + """ + original = row.loc["document"] + summary = row.loc["summary"] + + return SummarizationSample( + original=original, expected_results=summary, task="summarization" + ) + + def export_data(self, data: List[Sample], output_path: str): + """ + Exports the data to the corresponding format and saves it to 'output_path'. + + Args: + data (List[Sample]): + Data to export. + output_path (str): + Path to save the data to. + """ + rows = [] + for s in data: + row = Formatter.process(s, output_format="csv") + rows.append(row) + + df = pd.DataFrame( + rows, columns=list(self.COLUMN_NAMES["text-classification"].keys()) + ) + df.to_csv(output_path, index=False, encoding="utf-8") + + @staticmethod + def _find_delimiter(file_path: str) -> property: + """ + Helper function in charge of finding the delimiter character in a csv file. + Args: + file_path (str): + location of the csv file to load + Returns: + property: + """ + sniffer = csv.Sniffer() + with open(file_path, encoding="utf-8") as fp: + first_line = fp.readline() + delimiter = sniffer.sniff(first_line).delimiter + return delimiter From 2f1db05d048fa773f03f267ad8d5fe9d30b09cee Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Thu, 20 Jul 2023 12:17:21 +0530 Subject: [PATCH 02/24] updated langtest.py for custom-column-names for csv --- langtest/langtest.py | 69 ++++++++++++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/langtest/langtest.py b/langtest/langtest.py index cf57e482c..bef7970d9 100644 --- a/langtest/langtest.py +++ b/langtest/langtest.py @@ -11,7 +11,7 @@ from langtest.utils.custom_types.sample import RuntimeSample from .augmentation import AugmentRobustness, TemplaticAugment -from .datahandler.datasource import DataFactory, HuggingFaceDataset +from .datahandler.datasource import DataFactory, HuggingFaceDataset,CustomCSVDataset from .modelhandler import LANGCHAIN_HUBS, ModelFactory from .transform import TestFactory from .transform.utils import RepresentationOperation @@ -145,35 +145,60 @@ def __init__( and hub in self.SUPPORTED_HUBS_HF_DATASET_CLASSIFICATION and task == "text-classification" ): - self.data = ( - HuggingFaceDataset(data["name"], task=task).load_data( - feature_column=data.get("feature_column", "text"), - target_column=data.get("target_column", "label"), - split=data.get("split", "test"), - subset=data.get("subset", None), + if not data["name"].endswith(".csv"): + self.data = ( + + HuggingFaceDataset(data["name"], task=task).load_data( + feature_column=data.get("feature_column", "text"), + target_column=data.get("target_column", "label"), + split=data.get("split", "test"), + subset=data.get("subset", None), + ) + if data is not None + else None ) - if data is not None - else None - ) - if hub == "spacy" and (model == "textcat_imdb" or model is None): - if model is None: - logging.warning( - "Using the default 'textcat_imdb' model for Spacy hub. Please provide a custom model path if desired." + if hub == "spacy" and (model == "textcat_imdb" or model is None): + if model is None: + logging.warning( + "Using the default 'textcat_imdb' model for Spacy hub. Please provide a custom model path if desired." + ) + model = resource_filename("langtest", "data/textcat_imdb") + elif data["name"].endswith(".csv"): + self.data = ( + CustomCSVDataset(data["name"], task=task).load_data( + feature_column=data.get("feature_column", "text"), + target_column=data.get("target_column", "label"), ) - model = resource_filename("langtest", "data/textcat_imdb") + if data is not None + else None + ) + + if hub == "spacy" and (model == "textcat_imdb" or model is None): + if model is None: + logging.warning( + "Using the default 'textcat_imdb' model for Spacy hub. Please provide a custom model path if desired." + ) + model = resource_filename("langtest", "data/textcat_imdb") elif ( - type(data) is dict + + type(data) is dict and hub in self.SUPPORTED_HUBS_HF_DATASET_SUMMARIZATION and task == "summarization" ): - self.data = HuggingFaceDataset(data["name"], task=task).load_data( - feature_column=data.get("feature_column", "document"), - target_column=data.get("target_column", "summary"), - split=data.get("split", "test"), - subset=data.get("subset", None), - ) + if not data["name"].endswith(".csv"): + self.data = HuggingFaceDataset(data["name"], task=task).load_data( + feature_column=data.get("feature_column", "document"), + target_column=data.get("target_column", "summary"), + split=data.get("split", "test"), + subset=data.get("subset", None), + ) + elif data["name"].endswith(".csv"): + self.data = CustomCSVDataset(data["name"], task=task).load_data( + feature_column=data.get("feature_column", "document"), + target_column=data.get("target_column", "summary"), + ) elif data is None and (task, model, hub) not in self.DEFAULTS_DATASET.keys(): raise ValueError( From ad7da526c60555a9c9873da77a2022dce86c74f6 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Thu, 20 Jul 2023 12:32:04 +0530 Subject: [PATCH 03/24] Format: datasource.py and langtest.py --- langtest/datahandler/datasource.py | 58 ++++++++++++++++-------------- langtest/langtest.py | 8 ++--- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index ec3583c2d..efb77dbd2 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -890,7 +890,8 @@ class CustomCSVDataset(_IDataset): or "summarization". delimiter (str): The delimiter used in the CSV file to separate columns. - """ + """ + def __init__(self, file_path: str, task: str, **kwargs) -> None: """ Initializes a CustomCSVDataset object. @@ -922,7 +923,7 @@ def load_data_classification( dataset (pd.DataFrame): The input dataset containing the text data and corresponding labels. feature_column (str, optional): - Name of the column in the dataset containing the input text data. + Name of the column in the dataset containing the input text data. Default is "text". target_column (str, optional): Name of the column in the dataset containing the target labels for classification. @@ -933,16 +934,17 @@ def load_data_classification( Loaded split as a list of Sample objects, where each Sample object consists of an input text and its corresponding label. """ - + if feature_column and target_column: - dataset.rename(columns={feature_column: "text", target_column: "label"}, inplace=True) - + dataset.rename( + columns={feature_column: "text", target_column: "label"}, inplace=True + ) - samples = [self._row_to_seq_classification_sample(row) for _, row in dataset.iterrows()] + samples = [ + self._row_to_seq_classification_sample(row) for _, row in dataset.iterrows() + ] return samples - - def load_data_summarization( self, dataset: pd.DataFrame, @@ -969,17 +971,16 @@ def load_data_summarization( """ if feature_column and target_column: - dataset.rename(columns={feature_column: "document", target_column: "summary"},inplace=True) - samples = [self._row_to_sample_summarization(row) for _, row in dataset.iterrows()] + dataset.rename( + columns={feature_column: "document", target_column: "summary"}, + inplace=True, + ) + samples = [ + self._row_to_sample_summarization(row) for _, row in dataset.iterrows() + ] return samples - - - def load_data( - self, - feature_column: str, - target_column: str - ) -> List[Sample]: + def load_data(self, feature_column: str, target_column: str) -> List[Sample]: """ Load the specified split from the dataset library based on the task. @@ -997,7 +998,7 @@ def load_data( classification task, each Sample object consists of an input text and its corresponding label. For summarization task, each Sample object contains a document and its corresponding summary. - + Raises: ValueError: If the specified task is not supported or recognized. Currently supported tasks @@ -1006,17 +1007,20 @@ def load_data( dataset = pd.read_csv(self._file_path, delimiter=self.delimiter) if self.task == "text-classification": - return self.load_data_classification(dataset, - feature_column, target_column, + return self.load_data_classification( + dataset, + feature_column, + target_column, ) elif self.task == "summarization": - return self.load_data_summarization(dataset, - feature_column, target_column, + return self.load_data_summarization( + dataset, + feature_column, + target_column, ) else: raise ValueError(f"Unsupported task: {self.task}") - - + def _row_to_seq_classification_sample(self, row: pd.Series) -> Sample: """ Convert a row from the dataset into a Sample for the text-classification task @@ -1037,7 +1041,7 @@ def _row_to_seq_classification_sample(self, row: pd.Series) -> Sample: expected_results=SequenceClassificationOutput(predictions=[label]), ) - def _row_to_sample_summarization(self,row: pd.Series) -> Sample: + def _row_to_sample_summarization(self, row: pd.Series) -> Sample: """ Convert a row from the dataset into a Sample for summarization. @@ -1050,12 +1054,12 @@ def _row_to_sample_summarization(self,row: pd.Series) -> Sample: Row formatted into a Sample object for summarization. """ original = row.loc["document"] - summary = row.loc["summary"] + summary = row.loc["summary"] return SummarizationSample( original=original, expected_results=summary, task="summarization" ) - + def export_data(self, data: List[Sample], output_path: str): """ Exports the data to the corresponding format and saves it to 'output_path'. diff --git a/langtest/langtest.py b/langtest/langtest.py index bef7970d9..ba7a4da05 100644 --- a/langtest/langtest.py +++ b/langtest/langtest.py @@ -11,7 +11,7 @@ from langtest.utils.custom_types.sample import RuntimeSample from .augmentation import AugmentRobustness, TemplaticAugment -from .datahandler.datasource import DataFactory, HuggingFaceDataset,CustomCSVDataset +from .datahandler.datasource import DataFactory, HuggingFaceDataset, CustomCSVDataset from .modelhandler import LANGCHAIN_HUBS, ModelFactory from .transform import TestFactory from .transform.utils import RepresentationOperation @@ -147,7 +147,6 @@ def __init__( ): if not data["name"].endswith(".csv"): self.data = ( - HuggingFaceDataset(data["name"], task=task).load_data( feature_column=data.get("feature_column", "text"), target_column=data.get("target_column", "label"), @@ -182,8 +181,7 @@ def __init__( model = resource_filename("langtest", "data/textcat_imdb") elif ( - - type(data) is dict + type(data) is dict and hub in self.SUPPORTED_HUBS_HF_DATASET_SUMMARIZATION and task == "summarization" ): @@ -195,7 +193,7 @@ def __init__( subset=data.get("subset", None), ) elif data["name"].endswith(".csv"): - self.data = CustomCSVDataset(data["name"], task=task).load_data( + self.data = CustomCSVDataset(data["name"], task=task).load_data( feature_column=data.get("feature_column", "document"), target_column=data.get("target_column", "summary"), ) From d62598ea54708fa2b12bcb581937415531a6ad46 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 21 Jul 2023 14:21:46 +0530 Subject: [PATCH 04/24] added default_question_answering_prompt --- langtest/utils/custom_types/helpers.py | 1 + langtest/utils/custom_types/sample.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/langtest/utils/custom_types/helpers.py b/langtest/utils/custom_types/helpers.py index eaf6a3e04..ebd361ee2 100644 --- a/langtest/utils/custom_types/helpers.py +++ b/langtest/utils/custom_types/helpers.py @@ -12,6 +12,7 @@ "hellaswag": "You are an AI agent that completes sentences and cannot do anything else. You do not repeat the sentence and only continue for one sentence. Complete the following sentence: \n{context}{question}", "default_summarization_prompt": "You are an intelligent Context summarizer. Please read the following context carefully. After understanding its content, create a concise summary, capturing the essential themes and key details. Please ensure that the summary does not end abruptly and remains within the max_tokens word limit. Context: {context}\n\n Summary: ", "bbq": "Read the following context carefully and provide a concise answer based solely on the information given. Strictly, do not introduce any new information or make any assumptions. \n\nContext: {context}\nQuestion: {question}\n", + "default_question_answering_prompt": "You are an intelligent bot and it is your responsibility to make sure to give a concise answer. Context: {context}\n Question: {question}\n Answer:", } diff --git a/langtest/utils/custom_types/sample.py b/langtest/utils/custom_types/sample.py index 2cd28d885..683289439 100644 --- a/langtest/utils/custom_types/sample.py +++ b/langtest/utils/custom_types/sample.py @@ -421,7 +421,11 @@ def transform(self, func, params, prob, perturbations=None, **kwargs): self.category = func.__module__.split(".")[-1] def run(self, model, **kwargs): - dataset_name = self.dataset_name.split("-")[0].lower() + dataset_name = ( + self.dataset_name.split("-")[0].lower() + if self.dataset_name + else "default_question_answering_prompt" + ) prompt_template = kwargs.get( "user_prompt", default_user_prompt.get(dataset_name, "") ) From 902795b4c9fe0abea337592397a795152252e3c2 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 21 Jul 2023 14:22:08 +0530 Subject: [PATCH 05/24] added support for question answering for csv dataset --- langtest/datahandler/datasource.py | 70 ++++++++++++++++++++++++++++++ langtest/langtest.py | 13 ++++++ 2 files changed, 83 insertions(+) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index efb77dbd2..c806639cf 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -980,6 +980,70 @@ def load_data_summarization( ] return samples + def load_data_question_answering( + self, + dataset: pd.DataFrame, + feature_column: dict = {"passage": "passage", "question": "question"}, + target_column: str = "answer", + ) -> List[Sample]: + """ + Load the specified split from the dataset library for question-answering task. + + Args: + dataset (pd.DataFrame): + The input dataset containing the passage, question, and corresponding answers. + feature_column (dict, optional): + Dictionary of column names in the dataset containing the input passage and question data. + Default is {"passage": "passage", "question": "question"}. + target_column (str, optional): + Name of the column in the dataset containing the target answers for question-answering. + Default is "answer". + + Returns: + List[QASample]: + Loaded split as a list of QASample objects for question-answering task, where each + QASample object contains an original question, original context (passage), and the task name. + """ + passage_column = feature_column.get("passage") + question_column = feature_column.get("question") + + if feature_column and target_column: + if passage_column in dataset.columns: + dataset.rename(columns={passage_column: "passage"}, inplace=True) + else: + dataset["passage"] = "-" + + if question_column in dataset.columns: + dataset.rename(columns={question_column: "question"}, inplace=True) + + dataset.rename(columns={target_column: "answer"}, inplace=True) + + samples = [ + self._row_to_sample_question_answering(row) for _, row in dataset.iterrows() + ] + return samples + + def _row_to_sample_question_answering(self, row: pd.Series) -> QASample: + """ + Convert a row from the dataset into a QASample for question-answering. + + Args: + row (pd.Series): + Single row of the dataset. + + Returns: + QASample: + Row formatted into a QASample object for question-answering. + """ + question = row.loc["question"] + passage = row.loc["passage"] + + return QASample( + original_question=question, + original_context=passage, + task="question-answering", + ) + def load_data(self, feature_column: str, target_column: str) -> List[Sample]: """ Load the specified split from the dataset library based on the task. @@ -1018,6 +1082,12 @@ def load_data(self, feature_column: str, target_column: str) -> List[Sample]: feature_column, target_column, ) + elif self.task == "question-answering": + return self.load_data_question_answering( + dataset, + feature_column, + target_column, + ) else: raise ValueError(f"Unsupported task: {self.task}") diff --git a/langtest/langtest.py b/langtest/langtest.py index ba7a4da05..83ad9ade1 100644 --- a/langtest/langtest.py +++ b/langtest/langtest.py @@ -198,6 +198,19 @@ def __init__( target_column=data.get("target_column", "summary"), ) + elif ( + type(data) is dict + and hub in self.SUPPORTED_HUBS_HF_DATASET_SUMMARIZATION + and task == "question-answering" + ): + if data["name"].endswith(".csv"): + self.data = CustomCSVDataset(data["name"], task=task).load_data( + feature_column=data.get( + "feature_column", {"passage": "passage", "question": "question"} + ), + target_column=data.get("target_column", "answer"), + ) + elif data is None and (task, model, hub) not in self.DEFAULTS_DATASET.keys(): raise ValueError( "You haven't specified any value for the parameter 'data' and the configuration you " From 3382635c240846bf9c47fd19c9ae314397634eb4 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 21 Jul 2023 14:40:37 +0530 Subject: [PATCH 06/24] Test(test/test_harness.py): added test --- tests/test_harness.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/test_harness.py b/tests/test_harness.py index 34bc9c134..c8eee0517 100644 --- a/tests/test_harness.py +++ b/tests/test_harness.py @@ -222,6 +222,33 @@ def test_harness_edit_import_testcases(self): # test working of the harness harness.run().report() + def test_text_classification_csv_custom_columns(self): + """Test loading CSV data with custom column names for text classification.""" + save_dir = "/tmp/saved_HF_data_text_classification_harness_test" + tc_harness = Harness( + task="text-classification", + hub="huggingface", + data={ + "name": "tests/fixtures/text_classification.csv", + "feature_column": "text", + "target_column": "label", + }, + model="aychang/roberta-base-imdb", + ) + tc_harness.data = tc_harness.data[:10] + tc_harness.generate() + tc_harness.save(save_dir) + + loaded_tc_harness = Harness.load( + save_dir=save_dir, + task="text-classification", + model="aychang/roberta-base-imdb", + hub="huggingface", + ) + self.assertEqual(tc_harness._config, loaded_tc_harness._config) + self.assertEqual(tc_harness.data, loaded_tc_harness.data) + self.assertNotEqual(tc_harness.model, loaded_tc_harness.model) + class DefaultCodeBlocksTestCase(unittest.TestCase): """ From b459bbcdea7b8262400fedcfbbbd89aa5306bb8b Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 21 Jul 2023 15:43:00 +0530 Subject: [PATCH 07/24] updated test --- tests/test_harness.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_harness.py b/tests/test_harness.py index c8eee0517..a25a1811a 100644 --- a/tests/test_harness.py +++ b/tests/test_harness.py @@ -228,12 +228,13 @@ def test_text_classification_csv_custom_columns(self): tc_harness = Harness( task="text-classification", hub="huggingface", + hub="huggingface", + model="lvwerra/distilbert-imdb", data={ "name": "tests/fixtures/text_classification.csv", "feature_column": "text", "target_column": "label", }, - model="aychang/roberta-base-imdb", ) tc_harness.data = tc_harness.data[:10] tc_harness.generate() @@ -242,7 +243,7 @@ def test_text_classification_csv_custom_columns(self): loaded_tc_harness = Harness.load( save_dir=save_dir, task="text-classification", - model="aychang/roberta-base-imdb", + model="lvwerra/distilbert-imdb", hub="huggingface", ) self.assertEqual(tc_harness._config, loaded_tc_harness._config) From ea9e60702a4f5d2c66d3afebc8a8073d1844efd3 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 21 Jul 2023 15:45:07 +0530 Subject: [PATCH 08/24] minor change --- tests/test_harness.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_harness.py b/tests/test_harness.py index a25a1811a..4a1faf932 100644 --- a/tests/test_harness.py +++ b/tests/test_harness.py @@ -228,7 +228,6 @@ def test_text_classification_csv_custom_columns(self): tc_harness = Harness( task="text-classification", hub="huggingface", - hub="huggingface", model="lvwerra/distilbert-imdb", data={ "name": "tests/fixtures/text_classification.csv", From 9c8a6b359c69ca173fc47600504b00aa04f15f05 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 21 Jul 2023 18:41:34 +0530 Subject: [PATCH 09/24] chore(datasource): add load raw method for CustomCSVDataset --- langtest/datahandler/datasource.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index c744353f3..9107edd5d 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -966,8 +966,6 @@ def load_data_summarization( def load_raw_data( self, - feature_column: str = "text", - target_column: str = "label", split: str = "test", subset: str = None, ) -> List: @@ -1256,6 +1254,15 @@ def _row_to_sample_question_answering(self, row: pd.Series) -> QASample: task="question-answering", ) + def load_raw_data(self) -> List[Dict]: + """Loads data from a csv file into raw lists of strings + Returns: + List[Dict]: + parsed CSV file into list of dicts + """ + df = pd.read_csv(self._file_path) + return df.to_dict(orient="records") + def load_data(self, feature_column: str, target_column: str) -> List[Sample]: """ Load the specified split from the dataset library based on the task. From f45509fad9a91de3fdf4e1f734d2b8ddb91aa04a Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 28 Jul 2023 11:42:48 +0530 Subject: [PATCH 10/24] updated test for HuggingFaceDataset --- tests/test_datasource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_datasource.py b/tests/test_datasource.py index 3083a8db3..332b97b77 100644 --- a/tests/test_datasource.py +++ b/tests/test_datasource.py @@ -208,7 +208,7 @@ def test_load_raw_data(self, dataset, feature_col, target_col): """""" if isinstance(dataset, HuggingFaceDataset): raw_data = dataset.load_raw_data( - feature_column=feature_col, target_column=target_col, split="test[:30]" + split="test[:30]" ) else: raw_data = dataset.load_raw_data() From 108f6b02594e0ca6ebc3f14c479420139398c39f Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 28 Jul 2023 12:15:15 +0530 Subject: [PATCH 11/24] updated CSVDataset for custom column names --- langtest/datahandler/datasource.py | 673 ++++++++++++----------------- langtest/langtest.py | 80 +--- 2 files changed, 293 insertions(+), 460 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 9107edd5d..edc94c99c 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -9,7 +9,6 @@ import jsonlines import pandas as pd -from langtest.utils.custom_types import sample from langtest.utils.custom_types.sample import ToxicitySample, TranslationSample from .format import Formatter from ..utils.custom_types import ( @@ -99,7 +98,11 @@ def __init__(self, file_path: str, task: str, **kwargs) -> None: file_path (str): Path to the dataset. task (str): Task to be evaluated. """ - self._file_path = file_path + self._custom_label = file_path + if isinstance(self._custom_label, dict): + self._file_path = file_path["name"] + else: + self._file_path = file_path self._class_map = { cls.__name__.replace("Dataset", "").lower(): cls for cls in _IDataset.__subclasses__() @@ -127,9 +130,14 @@ def load(self) -> List[Sample]: Returns: list[Sample]: Loaded text data. """ - self.init_cls = self._class_map[self.file_ext.replace(".", "")]( - self._file_path, task=self.task, **self.kwargs - ) + if isinstance(self._custom_label, dict): + self.init_cls = self._class_map[self.file_ext.replace(".", "")]( + self._custom_label, task=self.task, **self.kwargs + ) + else: + self.init_cls = self._class_map[self.file_ext.replace(".", "")]( + self._file_path, task=self.task, **self.kwargs + ) return self.init_cls.load_data() def export(self, data: List[Sample], output_path: str): @@ -419,300 +427,6 @@ def export_data(self, data: List[Sample], output_path: str): raise NotImplementedError() -class CSVDataset(_IDataset): - """Class to handle CSV files dataset. Subclass of _IDataset.""" - - supported_tasks = ["ner", "text-classification"] - COLUMN_NAMES = {task: COLUMN_MAPPER[task] for task in supported_tasks} - - def __init__(self, file_path: str, task: str, **kwargs) -> None: - """Initializes CSVDataset object. - - Args: - file_path (str): - Path to the data file. - task (str): - name of the task to perform - """ - super().__init__() - self._file_path = file_path - self.task = task - self.delimiter = self._find_delimiter(file_path) - - if task in self.COLUMN_NAMES: - self.COLUMN_NAMES = self.COLUMN_NAMES[self.task] - elif "is_import" not in kwargs: - raise ValueError( - f"Given task ({task}) is not matched with template. \ - CSV dataset can ne only loaded for text-classification and ner!" - ) - self.column_map = None - self.kwargs = kwargs - - def load_raw_data(self, standardize_columns: bool = False) -> List[Dict]: - """Loads data from a csv file into raw lists of strings - - Args: - standardize_columns (bool): whether to standardize column names - - Returns: - List[Dict]: - parsed CSV file into list of dicts - """ - df = pd.read_csv(self._file_path) - - raw_data = [] - if not standardize_columns: - data = df.to_dict(orient="records") - if self.task == "ner": - for row in data: - raw_data.append( - { - key: (val if isinstance(val, list) else eval(val)) - for key, val in row.items() - } - ) - return raw_data - return data - - for _, row in df.iterrows(): - if not self.column_map: - self.column_map = self._match_column_names(list(row.keys())) - - label_col = ( - self.column_map["ner"] if self.task == "ner" else self.column_map["label"] - ) - - text = row[self.column_map["text"]] - labels = row[label_col] - - raw_data.append( - { - "text": text - if (isinstance(text, list) or self.task != "ner") - else eval(text), - "labels": labels - if (isinstance(labels, list) or self.task != "ner") - else eval(labels), - } - ) - - return raw_data - - def load_data(self) -> List[Sample]: - """Loads data from a csv file. - - Returns: - List[Sample]: List of formatted sentences from the dataset. - """ - if self.kwargs.get("is_import", False): - kwargs = self.kwargs.copy() - kwargs.pop("is_import") - return self._import_data(self._file_path, **kwargs) - - df = pd.read_csv(self._file_path) - - if not self.column_map: - self.column_map = self._match_column_names(list(df.columns)) - - samples = [] - for row_index, row in df.iterrows(): - if self.task == "ner": - samples.append(self._row_to_ner_sample(row.to_dict(), row_index)) - - elif self.task == "text-classification": - samples.append(self._row_to_seq_classification_sample(row.to_dict())) - - return samples - - def export_data(self, data: List[Sample], output_path: str): - """Exports the data to the corresponding format and saves it to 'output_path'. - - Args: - data (List[Sample]): - data to export - output_path (str): - path to save the data to - """ - if self.task == "ner": - final_data = defaultdict(list) - for elt in data: - tokens, labels, testcase_tokens, testcase_labels = Formatter.process( - elt, output_format="csv" - ) - final_data["text"].append(tokens) - final_data["ner"].append(labels) - final_data["testcase_text"].append(testcase_tokens) - final_data["testcase_labels"].append(testcase_labels) - - if ( - sum([len(labels) for labels in final_data["testcase_labels"]]) - * sum([len(tokens) for tokens in final_data["testcase_text"]]) - == 0 - ): - final_data.pop("testcase_text") - final_data.pop("testcase_labels") - - pd.DataFrame(data=final_data).to_csv(output_path, index=False) - - elif self.task == "text-classification": - rows = [] - for s in data: - row = Formatter.process(s, output_format="csv") - rows.append(row) - - df = pd.DataFrame(rows, columns=list(self.COLUMN_NAMES.keys())) - df.to_csv(output_path, index=False, encoding="utf-8") - - @staticmethod - def _find_delimiter(file_path: str) -> property: - """Helper function in charge of finding the delimiter character in a csv file. - - Args: - file_path (str): - location of the csv file to load - - Returns: - property: delimiter - """ - sniffer = csv.Sniffer() - with open(file_path, encoding="utf-8") as fp: - first_line = fp.readline() - delimiter = sniffer.sniff(first_line).delimiter - return delimiter - - def _row_to_ner_sample(self, row: Dict[str, List[str]], sent_index: int) -> Sample: - """Convert a row from the dataset into a Sample for the NER task. - - Args: - row (Dict[str, List[str]]): - single row of the dataset - sent_index (int): position of the sentence - - Returns: - Sample: - row formatted into a Sample object - """ - text_col = self.column_map["text"] - - for key, value in row.items(): - if isinstance(value, str): - row[key] = eval(value) - - assert all(isinstance(value, list) for value in row.values()), ValueError( - f"Column ({sent_index}th) values should be list that contains tokens or labels. " - "Given CSV file has invalid values" - ) - token_num = len(row[text_col]) - assert all(len(value) == token_num for value in row.values()), ValueError( - f"Column ({sent_index}th) values should have same length with number of token in text, " - f"which is {token_num}" - ) - - original = " ".join(row[text_col]) - ner_labels = list() - cursor = 0 - for token_indx in range(len(row[text_col])): - token = row[text_col][token_indx] - ner_labels.append( - NERPrediction.from_span( - entity=row[self.column_map["ner"]][token_indx], - word=token, - start=cursor, - end=cursor + len(token), - pos_tag=row[self.column_map["pos"]][token_indx] - if row.get(self.column_map["pos"], None) - else None, - chunk_tag=row[self.column_map["chunk"]][token_indx] - if row.get(self.column_map["chunk"], None) - else None, - ) - ) - cursor += len(token) + 1 # +1 to account for the white space - - return NERSample( - original=original, expected_results=NEROutput(predictions=ner_labels) - ) - - def _row_to_seq_classification_sample(self, row: Dict[str, str]) -> Sample: - """Convert a row from the dataset into a Sample for the text-classification task - - Args: - row (Dict[str, str]): - single row of the dataset - - Returns: - Sample: - row formatted into a Sample object - """ - original = row[self.column_map["text"]] - # label score should be 1 since it is ground truth, required for __eq__ - label = SequenceLabel(label=row[self.column_map["label"]], score=1) - - return SequenceClassificationSample( - original=original, - expected_results=SequenceClassificationOutput(predictions=[label]), - ) - - def _match_column_names(self, column_names: List[str]) -> Dict[str, str]: - """Helper function to map original column into standardized ones. - - Args: - column_names (List[str]): - list of column names of the csv file - - Returns: - Dict[str, str]: - mapping from the original column names into 'standardized' names - """ - column_map = {k: None for k in self.COLUMN_NAMES} - for c in column_names: - for key, reference_columns in self.COLUMN_NAMES.items(): - if c.lower() in reference_columns: - column_map[key] = c - - not_referenced_columns = { - k: self.COLUMN_NAMES[k] for k, v in column_map.items() if v is None - } - if "text" in not_referenced_columns and ( - "ner" in not_referenced_columns or "label" in not_referenced_columns - ): - raise OSError( - f"CSV file is invalid. CSV handler works with template column names!\n" - f"{', '.join(not_referenced_columns.keys())} column could not be found in header.\n" - f"You can use following namespaces:\n{not_referenced_columns}" - ) - return column_map - - def _import_data(self, file_name, **kwargs) -> List[Sample]: - """Helper function to import testcases from csv file after editing. - - Args: - file_name (str): path to the csv file - **kwargs: additional arguments to pass to pandas.read_csv - - Returns: - List[Sample]: list of samples - """ - data = pd.read_csv(file_name, **kwargs) - custom_names = { - "question-answering": "qa", - "text-classification": "sequenceclassification", - } - sample_models = { - k.lower(): v for k, v in sample.__dict__.items() if k.endswith("Sample") - } - samples = [] - - for i in data.to_dict(orient="records"): - if self.task in custom_names: - sample_name = custom_names[self.task] + "sample" - else: - sample_name = self.task.lower() + "sample" - samples.append(sample_models[sample_name](**i)) - return samples - - class JSONLDataset(_IDataset): """Class to handle JSONL datasets. Subclass of _IDataset.""" @@ -1088,8 +802,11 @@ def _row_to_sample_classification(self, data_row: Dict[str, str]) -> Sample: ) -class CustomCSVDataset(_IDataset): - """ +class CSVDataset(_IDataset): + supported_tasks = ["ner", "text-classification"] + COLUMN_NAMES = {task: COLUMN_MAPPER[task] for task in supported_tasks} + + """ A class to handle CSV files datasets. Subclass of _IDataset. Attributes: @@ -1118,13 +835,200 @@ def __init__(self, file_path: str, task: str, **kwargs) -> None: super().__init__() self._file_path = file_path self.task = task - self.delimiter = self._find_delimiter(file_path) + if type(file_path) == dict: + self.delimiter = self._find_delimiter(file_path["name"]) + else: + if task in self.COLUMN_NAMES: + self.COLUMN_NAMES = self.COLUMN_NAMES[self.task] + elif "is_import" not in kwargs: + raise ValueError( + f"Given task ({task}) is not matched with template. \ + CSV dataset can ne only loaded for text-classification and ner!" + ) + self.delimiter = self._find_delimiter(file_path) + + self.column_map = None + self.kwargs = kwargs + + def load_raw_data(self, standardize_columns: bool = False) -> List[Dict]: + """Loads data from a csv file into raw lists of strings + + Args: + standardize_columns (bool): whether to standardize column names + + Returns: + List[Dict]: + parsed CSV file into list of dicts + """ + df = pd.read_csv(self._file_path) + + raw_data = [] + if not standardize_columns: + data = df.to_dict(orient="records") + if self.task == "ner": + for row in data: + raw_data.append( + { + key: (val if isinstance(val, list) else eval(val)) + for key, val in row.items() + } + ) + return raw_data + return data + + for _, row in df.iterrows(): + if not self.column_map: + self.column_map = self._match_column_names(list(row.keys())) + + label_col = ( + self.column_map["ner"] if self.task == "ner" else self.column_map["label"] + ) + + text = row[self.column_map["text"]] + labels = row[label_col] + + raw_data.append( + { + "text": text + if (isinstance(text, list) or self.task != "ner") + else eval(text), + "labels": labels + if (isinstance(labels, list) or self.task != "ner") + else eval(labels), + } + ) + + return raw_data + + def load_data(self) -> List[Sample]: + if type(self._file_path) == dict: + dataset = pd.read_csv(self._file_path["name"]) + else: + dataset = pd.read_csv(self._file_path) + if not self.column_map: + self.column_map = self._match_column_names(list(dataset.columns)) + + task_function = getattr(self, f"load_data_{self.task}", None) + task_functions = { + "text-classification": self.load_data_classification, + "ner": self.load_data_ner, + "summarization": self.load_data_summarization, + "question-answering": self.load_data_question_answering, + } + + if self.task in task_functions: + task_function = task_functions[self.task] + return task_function(dataset) + else: + raise ValueError(f"Unsupported task: {self.task}") + + def export_data(self, data: List[Sample], output_path: str): + """Exports the data to the corresponding format and saves it to 'output_path'. + + Args: + data (List[Sample]): + data to export + output_path (str): + path to save the data to + """ + if self.task == "ner": + final_data = defaultdict(list) + for elt in data: + tokens, labels, testcase_tokens, testcase_labels = Formatter.process( + elt, output_format="csv" + ) + final_data["text"].append(tokens) + final_data["ner"].append(labels) + final_data["testcase_text"].append(testcase_tokens) + final_data["testcase_labels"].append(testcase_labels) + + if ( + sum([len(labels) for labels in final_data["testcase_labels"]]) + * sum([len(tokens) for tokens in final_data["testcase_text"]]) + == 0 + ): + final_data.pop("testcase_text") + final_data.pop("testcase_labels") + + pd.DataFrame(data=final_data).to_csv(output_path, index=False) + + elif self.task == "text-classification": + rows = [] + for s in data: + row = Formatter.process(s, output_format="csv") + rows.append(row) + + df = pd.DataFrame(rows, columns=list(self.COLUMN_NAMES.keys())) + df.to_csv(output_path, index=False, encoding="utf-8") + + def load_data_ner( + self, + dataset: pd.DataFrame, + ) -> List[Sample]: + samples = [] + for row_index, row in dataset.iterrows(): + samples.append(self._row_to_ner_sample(row.to_dict(), row_index)) + + return samples + + def _row_to_ner_sample(self, row: Dict[str, List[str]], sent_index: int) -> Sample: + """Convert a row from the dataset into a Sample for the NER task. + + Args: + row (Dict[str, List[str]]): + single row of the dataset + sent_index (int): position of the sentence + + Returns: + Sample: + row formatted into a Sample object + + """ + + text_col = self.column_map["text"] + + for key, value in row.items(): + if isinstance(value, str): + row[key] = eval(value) + + assert all(isinstance(value, list) for value in row.values()), ValueError( + f"Column ({sent_index}th) values should be list that contains tokens or labels. " + "Given CSV file has invalid values" + ) + token_num = len(row[text_col]) + assert all(len(value) == token_num for value in row.values()), ValueError( + f"Column ({sent_index}th) values should have same length with number of token in text, " + f"which is {token_num}" + ) + + original = " ".join(row[text_col]) + ner_labels = list() + cursor = 0 + for token_indx in range(len(row[text_col])): + token = row[text_col][token_indx] + ner_labels.append( + NERPrediction.from_span( + entity=row[self.column_map["ner"]][token_indx], + word=token, + start=cursor, + end=cursor + len(token), + pos_tag=row[self.column_map["pos"]][token_indx] + if row.get(self.column_map["pos"], None) + else None, + chunk_tag=row[self.column_map["chunk"]][token_indx] + if row.get(self.column_map["chunk"], None) + else None, + ) + ) + cursor += len(token) + 1 # +1 to account for the white space + + return NERSample( + original=original, expected_results=NEROutput(predictions=ner_labels) + ) def load_data_classification( self, dataset: pd.DataFrame, - feature_column: str = "text", - target_column: str = "label", ) -> List[Sample]: """ Load the specified split from the dataset library for classification task. @@ -1144,11 +1048,14 @@ def load_data_classification( Loaded split as a list of Sample objects, where each Sample object consists of an input text and its corresponding label. """ + if type(self._file_path) == dict: + feature_column = (self._file_path.get("feature_column", "text"),) + target_column = (self._file_path.get("target_column", "label"),) - if feature_column and target_column: - dataset.rename( - columns={feature_column: "text", target_column: "label"}, inplace=True - ) + if feature_column and target_column: + dataset.rename( + columns={feature_column: "text", target_column: "label"}, inplace=True + ) samples = [ self._row_to_seq_classification_sample(row) for _, row in dataset.iterrows() @@ -1158,8 +1065,6 @@ def load_data_classification( def load_data_summarization( self, dataset: pd.DataFrame, - feature_column: str = "document", - target_column: str = "summary", ) -> List[Sample]: """ Load the specified split from the dataset library for summarization task. @@ -1179,12 +1084,13 @@ def load_data_summarization( Loaded split as a list of Sample objects for summarization task, where each Sample object contains a document and its corresponding summary. """ + feature_column = self._file_path.get("feature_column", "document") + target_column = self._file_path.get("target_column", "summary") + + dataset.rename( + columns={feature_column: "document", target_column: "summary"}, inplace=True + ) - if feature_column and target_column: - dataset.rename( - columns={feature_column: "document", target_column: "summary"}, - inplace=True, - ) samples = [ self._row_to_sample_summarization(row) for _, row in dataset.iterrows() ] @@ -1193,8 +1099,6 @@ def load_data_summarization( def load_data_question_answering( self, dataset: pd.DataFrame, - feature_column: dict = {"passage": "passage", "question": "question"}, - target_column: str = "answer", ) -> List[Sample]: """ Load the specified split from the dataset library for question-answering task. @@ -1214,19 +1118,23 @@ def load_data_question_answering( Loaded split as a list of QASample objects for question-answering task, where each QASample object contains an original question, original context (passage), and the task name. """ + feature_column = self._file_path.get( + "feature_column", {"passage": "passage", "question": "question"} + ) + target_column = self._file_path.get("target_column", "answer") + passage_column = feature_column.get("passage") question_column = feature_column.get("question") - if feature_column and target_column: - if passage_column in dataset.columns: - dataset.rename(columns={passage_column: "passage"}, inplace=True) - else: - dataset["passage"] = "-" + if passage_column in dataset.columns: + dataset.rename(columns={passage_column: "passage"}, inplace=True) + else: + dataset["passage"] = "-" - if question_column in dataset.columns: - dataset.rename(columns={question_column: "question"}, inplace=True) + if question_column in dataset.columns: + dataset.rename(columns={question_column: "question"}, inplace=True) - dataset.rename(columns={target_column: "answer"}, inplace=True) + dataset.rename(columns={target_column: "answer"}, inplace=True) samples = [ self._row_to_sample_question_answering(row) for _, row in dataset.iterrows() @@ -1254,62 +1162,6 @@ def _row_to_sample_question_answering(self, row: pd.Series) -> QASample: task="question-answering", ) - def load_raw_data(self) -> List[Dict]: - """Loads data from a csv file into raw lists of strings - Returns: - List[Dict]: - parsed CSV file into list of dicts - """ - df = pd.read_csv(self._file_path) - return df.to_dict(orient="records") - - def load_data(self, feature_column: str, target_column: str) -> List[Sample]: - """ - Load the specified split from the dataset library based on the task. - - Args: - feature_column (str): - Name of the column in the dataset containing the input text data for classification - or the input document data for summarization. - target_column (str): - Name of the column in the dataset containing the target labels for classification - or the target summaries for summarization. - - Returns: - List[Sample]: - Loaded split as a list of Sample objects based on the specified task. For text - classification task, each Sample object consists of an input text and its - corresponding label. For summarization task, each Sample object contains a - document and its corresponding summary. - - Raises: - ValueError: - If the specified task is not supported or recognized. Currently supported tasks - include "text-classification" and "summarization". - """ - dataset = pd.read_csv(self._file_path, delimiter=self.delimiter) - - if self.task == "text-classification": - return self.load_data_classification( - dataset, - feature_column, - target_column, - ) - elif self.task == "summarization": - return self.load_data_summarization( - dataset, - feature_column, - target_column, - ) - elif self.task == "question-answering": - return self.load_data_question_answering( - dataset, - feature_column, - target_column, - ) - else: - raise ValueError(f"Unsupported task: {self.task}") - def _row_to_seq_classification_sample(self, row: pd.Series) -> Sample: """ Convert a row from the dataset into a Sample for the text-classification task @@ -1322,8 +1174,13 @@ def _row_to_seq_classification_sample(self, row: pd.Series) -> Sample: Sample: Row formatted into a Sample object """ - original = row.loc["text"] - label = SequenceLabel(label=row.loc["label"], score=1) + if type(self._file_path) == dict: + original = row.loc["text"] + label = SequenceLabel(label=row.loc["label"], score=1) + else: + original = row[self.column_map["text"]] + # label score should be 1 since it is ground truth, required for __eq__ + label = SequenceLabel(label=row[self.column_map["label"]], score=1) return SequenceClassificationSample( original=original, @@ -1349,26 +1206,6 @@ def _row_to_sample_summarization(self, row: pd.Series) -> Sample: original=original, expected_results=summary, task="summarization" ) - def export_data(self, data: List[Sample], output_path: str): - """ - Exports the data to the corresponding format and saves it to 'output_path'. - - Args: - data (List[Sample]): - Data to export. - output_path (str): - Path to save the data to. - """ - rows = [] - for s in data: - row = Formatter.process(s, output_format="csv") - rows.append(row) - - df = pd.DataFrame( - rows, columns=list(self.COLUMN_NAMES["text-classification"].keys()) - ) - df.to_csv(output_path, index=False, encoding="utf-8") - @staticmethod def _find_delimiter(file_path: str) -> property: """ @@ -1384,3 +1221,33 @@ def _find_delimiter(file_path: str) -> property: first_line = fp.readline() delimiter = sniffer.sniff(first_line).delimiter return delimiter + + def _match_column_names(self, column_names: List[str]) -> Dict[str, str]: + """Helper function to map original column into standardized ones. + + Args: + column_names (List[str]): + list of column names of the csv file + + Returns: + Dict[str, str]: + mapping from the original column names into 'standardized' names + """ + column_map = {k: None for k in self.COLUMN_NAMES} + for c in column_names: + for key, reference_columns in self.COLUMN_NAMES.items(): + if c.lower() in reference_columns: + column_map[key] = c + + not_referenced_columns = { + k: self.COLUMN_NAMES[k] for k, v in column_map.items() if v is None + } + if "text" in not_referenced_columns and ( + "ner" in not_referenced_columns or "label" in not_referenced_columns + ): + raise OSError( + f"CSV file is invalid. CSV handler works with template column names!\n" + f"{', '.join(not_referenced_columns.keys())} column could not be found in header.\n" + f"You can use following namespaces:\n{not_referenced_columns}" + ) + return column_map diff --git a/langtest/langtest.py b/langtest/langtest.py index 83ad9ade1..0fa3f6bd6 100644 --- a/langtest/langtest.py +++ b/langtest/langtest.py @@ -11,7 +11,7 @@ from langtest.utils.custom_types.sample import RuntimeSample from .augmentation import AugmentRobustness, TemplaticAugment -from .datahandler.datasource import DataFactory, HuggingFaceDataset, CustomCSVDataset +from .datahandler.datasource import DataFactory, HuggingFaceDataset from .modelhandler import LANGCHAIN_HUBS, ModelFactory from .transform import TestFactory from .transform.utils import RepresentationOperation @@ -142,74 +142,40 @@ def __init__( elif ( type(data) is dict + and not data["name"].endswith(".csv") and hub in self.SUPPORTED_HUBS_HF_DATASET_CLASSIFICATION and task == "text-classification" ): - if not data["name"].endswith(".csv"): - self.data = ( - HuggingFaceDataset(data["name"], task=task).load_data( - feature_column=data.get("feature_column", "text"), - target_column=data.get("target_column", "label"), - split=data.get("split", "test"), - subset=data.get("subset", None), - ) - if data is not None - else None + self.data = ( + HuggingFaceDataset(data["name"], task=task).load_data( + feature_column=data.get("feature_column", "text"), + target_column=data.get("target_column", "label"), + split=data.get("split", "test"), + subset=data.get("subset", None), ) + if data is not None + else None + ) - if hub == "spacy" and (model == "textcat_imdb" or model is None): - if model is None: - logging.warning( - "Using the default 'textcat_imdb' model for Spacy hub. Please provide a custom model path if desired." - ) - model = resource_filename("langtest", "data/textcat_imdb") - elif data["name"].endswith(".csv"): - self.data = ( - CustomCSVDataset(data["name"], task=task).load_data( - feature_column=data.get("feature_column", "text"), - target_column=data.get("target_column", "label"), + if hub == "spacy" and (model == "textcat_imdb" or model is None): + if model is None: + logging.warning( + "Using the default 'textcat_imdb' model for Spacy hub. Please provide a custom model path if desired." ) - if data is not None - else None - ) - - if hub == "spacy" and (model == "textcat_imdb" or model is None): - if model is None: - logging.warning( - "Using the default 'textcat_imdb' model for Spacy hub. Please provide a custom model path if desired." - ) - model = resource_filename("langtest", "data/textcat_imdb") + model = resource_filename("langtest", "data/textcat_imdb") elif ( type(data) is dict + and not data["name"].endswith(".csv") and hub in self.SUPPORTED_HUBS_HF_DATASET_SUMMARIZATION and task == "summarization" ): - if not data["name"].endswith(".csv"): - self.data = HuggingFaceDataset(data["name"], task=task).load_data( - feature_column=data.get("feature_column", "document"), - target_column=data.get("target_column", "summary"), - split=data.get("split", "test"), - subset=data.get("subset", None), - ) - elif data["name"].endswith(".csv"): - self.data = CustomCSVDataset(data["name"], task=task).load_data( - feature_column=data.get("feature_column", "document"), - target_column=data.get("target_column", "summary"), - ) - - elif ( - type(data) is dict - and hub in self.SUPPORTED_HUBS_HF_DATASET_SUMMARIZATION - and task == "question-answering" - ): - if data["name"].endswith(".csv"): - self.data = CustomCSVDataset(data["name"], task=task).load_data( - feature_column=data.get( - "feature_column", {"passage": "passage", "question": "question"} - ), - target_column=data.get("target_column", "answer"), - ) + self.data = HuggingFaceDataset(data["name"], task=task).load_data( + feature_column=data.get("feature_column", "document"), + target_column=data.get("target_column", "summary"), + split=data.get("split", "test"), + subset=data.get("subset", None), + ) elif data is None and (task, model, hub) not in self.DEFAULTS_DATASET.keys(): raise ValueError( From 234177f301743443d11a729850309a19ba22fa72 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 28 Jul 2023 12:22:08 +0530 Subject: [PATCH 12/24] tests\test_datasource.py reformatted --- tests/test_datasource.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_datasource.py b/tests/test_datasource.py index 332b97b77..4c4719632 100644 --- a/tests/test_datasource.py +++ b/tests/test_datasource.py @@ -207,9 +207,7 @@ class TestSummarizationDataset: def test_load_raw_data(self, dataset, feature_col, target_col): """""" if isinstance(dataset, HuggingFaceDataset): - raw_data = dataset.load_raw_data( - split="test[:30]" - ) + raw_data = dataset.load_raw_data(split="test[:30]") else: raw_data = dataset.load_raw_data() From 30add6667c61a21be4b7b599b4ab5a9f702f6654 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 28 Jul 2023 13:57:36 +0530 Subject: [PATCH 13/24] datasource.py updated --- langtest/datahandler/datasource.py | 35 +++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index edc94c99c..194b225b8 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -9,6 +9,7 @@ import jsonlines import pandas as pd +from langtest.utils.custom_types import sample from langtest.utils.custom_types.sample import ToxicitySample, TranslationSample from .format import Formatter from ..utils.custom_types import ( @@ -901,6 +902,11 @@ def load_raw_data(self, standardize_columns: bool = False) -> List[Dict]: return raw_data def load_data(self) -> List[Sample]: + if self.kwargs.get("is_import", False): + kwargs = self.kwargs.copy() + kwargs.pop("is_import") + return self._import_data(self._file_path, **kwargs) + if type(self._file_path) == dict: dataset = pd.read_csv(self._file_path["name"]) else: @@ -908,7 +914,6 @@ def load_data(self) -> List[Sample]: if not self.column_map: self.column_map = self._match_column_names(list(dataset.columns)) - task_function = getattr(self, f"load_data_{self.task}", None) task_functions = { "text-classification": self.load_data_classification, "ner": self.load_data_ner, @@ -1251,3 +1256,31 @@ def _match_column_names(self, column_names: List[str]) -> Dict[str, str]: f"You can use following namespaces:\n{not_referenced_columns}" ) return column_map + + def _import_data(self, file_name, **kwargs) -> List[Sample]: + """Helper function to import testcases from csv file after editing. + + Args: + file_name (str): path to the csv file + **kwargs: additional arguments to pass to pandas.read_csv + + Returns: + List[Sample]: list of samples + """ + data = pd.read_csv(file_name, **kwargs) + custom_names = { + "question-answering": "qa", + "text-classification": "sequenceclassification", + } + sample_models = { + k.lower(): v for k, v in sample.__dict__.items() if k.endswith("Sample") + } + samples = [] + + for i in data.to_dict(orient="records"): + if self.task in custom_names: + sample_name = custom_names[self.task] + "sample" + else: + sample_name = self.task.lower() + "sample" + samples.append(sample_models[sample_name](**i)) + return samples From c4692beaaf7d6660d1d31c745fd5db05244af71d Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Sun, 30 Jul 2023 05:04:36 +0530 Subject: [PATCH 14/24] re-arranged code and directly load csv for summarization, question-answering --- langtest/datahandler/datasource.py | 1417 ++++++++++++++-------------- 1 file changed, 733 insertions(+), 684 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 194b225b8..ace5a91e8 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -4,7 +4,7 @@ import os import re from abc import ABC, abstractmethod -from typing import Dict, List +from typing import Dict, List, Union import jsonlines import pandas as pd @@ -392,168 +392,156 @@ def export_data(self, data: List[Sample], output_path: str): fwriter.write(bytes(otext, encoding="utf-8")) -class JSONDataset(_IDataset): - """Class to handle JSON dataset files. Subclass of _IDataset.""" - - def __init__(self, file_path: str): - """Initializes JSONDataset object. - - Args: - file_path (str): Path to the data file. - """ - super().__init__() - self._file_path = file_path - - def load_raw_data(self): - """Loads data into a raw list""" - raise NotImplementedError() - - def load_data(self) -> List[Sample]: - """Loads data into a list of Sample - - Returns: - List[Sample]: formatted samples - """ - raise NotImplementedError() - - def export_data(self, data: List[Sample], output_path: str): - """Exports the data to the corresponding format and saves it to 'output_path'. - - Args: - data (List[Sample]): - data to export - output_path (str): - path to save the data to - """ - raise NotImplementedError() - - -class JSONLDataset(_IDataset): - """Class to handle JSONL datasets. Subclass of _IDataset.""" - +class CSVDataset(_IDataset): supported_tasks = [ "ner", "text-classification", - "question-answering", "summarization", - "toxicity", - "translation", + "question-answering", ] COLUMN_NAMES = {task: COLUMN_MAPPER[task] for task in supported_tasks} - def __init__(self, file_path: str, task: str) -> None: - """Initializes JSONLDataset object. + """ + A class to handle CSV files datasets. Subclass of _IDataset. + + Attributes: + _file_path (Union[str, Dict]): + The path to the data file or a dictionary containing "name" key with the path. + task (str): + Specifies the task of the dataset, which can be either "text-classification","ner" + "question-answering" and "summarization". + delimiter (str): + The delimiter used in the CSV file to separate columns (only for file_path as str). + """ + + def __init__(self, file_path: Union[str, Dict], task: str, **kwargs) -> None: + """ + Initializes a CustomCSVDataset object. Args: - file_path (str): Path to the data file. - task (str): name of the task to perform + file_path (Union[str, Dict]): + The path to the data file or a dictionary containing "name" key with the path + task (str): + Specifies the task of the dataset, which can be either "text-classification","ner" + "question-answering" and "summarization". + **kwargs: + Additional keyword arguments that can be used to configure the dataset (optional). """ super().__init__() self._file_path = file_path self.task = task - self.column_matcher = None + if type(file_path) == dict: + self.delimiter = self._find_delimiter(file_path["name"]) + else: + if task in self.COLUMN_NAMES: + self.COLUMN_NAMES = self.COLUMN_NAMES[self.task] + elif "is_import" not in kwargs: + raise ValueError( + f"Given task ({task}) is not matched with template. \ + CSV dataset can ne only loaded for text-classification and ner!" + ) + self.delimiter = self._find_delimiter(file_path) - def _match_column_names(self, column_names: List[str]) -> Dict[str, str]: - """Helper function to map original column into standardized ones. + self.column_map = None + self.kwargs = kwargs + + def load_raw_data(self, standardize_columns: bool = False) -> List[Dict]: + """Loads data from a csv file into raw lists of strings Args: - column_names (List[str]): - list of column names of the csv file + standardize_columns (bool): whether to standardize column names Returns: - Dict[str, str]: - mapping from the original column names into 'standardized' names + List[Dict]: + parsed CSV file into list of dicts """ - column_map = {} - for column in column_names: - for key, reference_columns in self.COLUMN_NAMES[self.task].items(): - if column.lower() in reference_columns: - column_map[key] = column + df = pd.read_csv(self._file_path) - not_referenced_columns = [ - col for col in self.COLUMN_NAMES[self.task] if col not in column_map - ] + raw_data = [] + if not standardize_columns: + data = df.to_dict(orient="records") + if self.task == "ner": + for row in data: + raw_data.append( + { + key: (val if isinstance(val, list) else eval(val)) + for key, val in row.items() + } + ) + return raw_data + return data - if "text" in not_referenced_columns: - raise OSError( - f"Your dataset needs to have at least have a column with one of the following name: " - f"{self.COLUMN_NAMES[self.task]['text']}, found: {column_names}." + for _, row in df.iterrows(): + if not self.column_map: + self.column_map = self._match_column_names(list(row.keys())) + + label_col = ( + self.column_map["ner"] if self.task == "ner" else self.column_map["label"] ) - for missing_col in not_referenced_columns: - column_map[missing_col] = None - return column_map + text = row[self.column_map["text"]] + labels = row[label_col] - def load_raw_data(self) -> List[Dict]: - """Loads data from a JSON file into a list""" - with jsonlines.open(self._file_path) as reader: - data = [obj for obj in reader] - return data + raw_data.append( + { + "text": text + if (isinstance(text, list) or self.task != "ner") + else eval(text), + "labels": labels + if (isinstance(labels, list) or self.task != "ner") + else eval(labels), + } + ) + + return raw_data def load_data(self) -> List[Sample]: - """Loads data from a JSONL file and format it into a list of Sample. + """ + Load data from a CSV file and preprocess it based on the specified task. Returns: - list[Sample]: Loaded text data. - """ - data = [] - with jsonlines.open(self._file_path) as reader: - for item in reader: - if self.column_matcher is None: - self.column_matcher = self._match_column_names(item.keys()) + List[Sample]: A list of preprocessed data samples. - if self.task == "question-answering": - expected_results = item.get(self.column_matcher["answer"]) - if isinstance(expected_results, str) or isinstance( - expected_results, bool - ): - expected_results = [str(expected_results)] + Raises: + ValueError: If the specified task is unsupported. - data.append( - QASample( - original_question=item[self.column_matcher["text"]], - original_context=item.get( - self.column_matcher["context"], "-" - ), - expected_results=expected_results, - task=self.task, - dataset_name=self._file_path.split("/")[-2], - ) - ) + Note: + - If 'is_import' is set to True in the constructor's keyword arguments, + the data will be imported using the specified 'file_path' and optional + 'column_map' for renaming columns. - elif self.task == "summarization": - expected_results = item.get(self.column_matcher["summary"]) - if isinstance(expected_results, str) or isinstance( - expected_results, bool - ): - expected_results = [str(expected_results)] - data.append( - SummarizationSample( - original=item[self.column_matcher["text"]], - expected_results=expected_results, - task=self.task, - dataset_name=self._file_path.split("/")[-2], - ) - ) - elif self.task == "toxicity": - data.append( - ToxicitySample( - prompt=item[self.column_matcher["text"]], - task=self.task, - dataset_name=self._file_path.split("/")[-2], - ) - ) + - If 'is_import' is set to False (default), the data will be loaded from + a CSV file specified in 'file_path', and the 'column_map' will be + automatically matched with the dataset columns. - elif self.task == "translation": - data.append( - TranslationSample( - original=item[self.column_matcher["text"]], - task=self.task, - dataset_name=self._file_path.split("/")[-2], - ) - ) + - The supported task types are: 'text-classification', 'ner', + 'summarization', and 'question-answering'. The appropriate task-specific + loading function will be invoked to preprocess the data. + """ + if self.kwargs.get("is_import", False): + kwargs = self.kwargs.copy() + kwargs.pop("is_import") + return self._import_data(self._file_path, **kwargs) - return data + if type(self._file_path) == dict: + dataset = pd.read_csv(self._file_path["name"]) + else: + dataset = pd.read_csv(self._file_path) + if not self.column_map: + self.column_map = self._match_column_names(list(dataset.columns)) + + task_functions = { + "text-classification": self.load_data_classification, + "ner": self.load_data_ner, + "summarization": self.load_data_summarization, + "question-answering": self.load_data_question_answering, + } + + if self.task in task_functions: + task_function = task_functions[self.task] + return task_function(dataset) + else: + raise ValueError(f"Unsupported task: {self.task}") def export_data(self, data: List[Sample], output_path: str): """Exports the data to the corresponding format and saves it to 'output_path'. @@ -564,430 +552,202 @@ def export_data(self, data: List[Sample], output_path: str): output_path (str): path to save the data to """ - raise NotImplementedError() + if self.task == "ner": + final_data = defaultdict(list) + for elt in data: + tokens, labels, testcase_tokens, testcase_labels = Formatter.process( + elt, output_format="csv" + ) + final_data["text"].append(tokens) + final_data["ner"].append(labels) + final_data["testcase_text"].append(testcase_tokens) + final_data["testcase_labels"].append(testcase_labels) + if ( + sum([len(labels) for labels in final_data["testcase_labels"]]) + * sum([len(tokens) for tokens in final_data["testcase_text"]]) + == 0 + ): + final_data.pop("testcase_text") + final_data.pop("testcase_labels") -class HuggingFaceDataset(_IDataset): - """Example dataset class that loads data using the Hugging Face dataset library.""" + pd.DataFrame(data=final_data).to_csv(output_path, index=False) - supported_tasks = ["text-classification", "summarization"] + elif self.task == "text-classification": + rows = [] + for s in data: + row = Formatter.process(s, output_format="csv") + rows.append(row) - LIB_NAME = "datasets" - COLUMN_NAMES = {task: COLUMN_MAPPER[task] for task in supported_tasks} - - def __init__(self, dataset_name: str, task: str): - """Initialize the HuggingFaceDataset class. + df = pd.DataFrame(rows, columns=list(self.COLUMN_NAMES.keys())) + df.to_csv(output_path, index=False, encoding="utf-8") + @staticmethod + def _find_delimiter(file_path: str) -> property: + """ + Helper function in charge of finding the delimiter character in a csv file. Args: - dataset_name (str): - Name of the dataset to load. - task (str): - Task to be evaluated on. + file_path (str): + location of the csv file to load + Returns: + property: """ - self.dataset_name = dataset_name - self.task = task - self._check_datasets_package() + sniffer = csv.Sniffer() + with open(file_path, encoding="utf-8") as fp: + first_line = fp.readline() + delimiter = sniffer.sniff(first_line).delimiter + return delimiter - def _check_datasets_package(self): - """Check if the 'datasets' package is installed and import the load_dataset function. + def load_data_ner( + self, + dataset: pd.DataFrame, + ) -> List[Sample]: + """ + Preprocess data for Named Entity Recognition (NER) task. + + Args: + dataset (pd.DataFrame): Input data in DataFrame format. + + Returns: + List[Sample]: Preprocessed data samples for NER task. - Raises an error if the package is not found. """ - if try_import_lib(self.LIB_NAME): - dataset_module = importlib.import_module(self.LIB_NAME) - self.load_dataset = getattr(dataset_module, "load_dataset") - else: - raise ModuleNotFoundError( - f"The '{self.LIB_NAME}' package is not installed. Please install it using 'pip install {self.LIB_NAME}'." - ) + samples = [] + for row_index, row in dataset.iterrows(): + samples.append(self._row_to_ner_sample(row.to_dict(), row_index)) + + return samples def load_data_classification( self, - feature_column: str = "text", - target_column: str = "label", - split: str = "test", - subset: str = None, + dataset: pd.DataFrame, ) -> List[Sample]: - """Load the specified split from the dataset library. + """ + Load the specified split from the dataset library for classification task. Args: - feature_column (str): - Name of the feature_column column. - target_column (str): - Name of the target_column column. - split (str): - Name of the split to load (e.g., train, validation, test). - subset (str): - Name of the configuration. + dataset (pd.DataFrame): + The input dataset containing the text data and corresponding labels. + feature_column (str, optional): + Name of the column in the dataset containing the input text data. + Default is "text". + target_column (str, optional): + Name of the column in the dataset containing the target labels for classification. + Default is "label". Returns: List[Sample]: - Loaded split as a list of Sample objects. + Loaded split as a list of Sample objects, where each Sample object consists + of an input text and its corresponding label. """ - if subset: - dataset = self.load_dataset(self.dataset_name, name=subset, split=split) - else: - dataset = self.load_dataset(self.dataset_name, split=split) + if type(self._file_path) == dict: + feature_column = (self._file_path.get("feature_column", "text"),) + target_column = (self._file_path.get("target_column", "label"),) - if feature_column and target_column: - dataset = dataset.map( - lambda example: { - "text": example[feature_column], - "label": example[target_column], - } - ) + if feature_column and target_column: + dataset.rename( + columns={feature_column: "text", target_column: "label"}, inplace=True + ) - samples = [self._row_to_sample_classification(example) for example in dataset] + samples = [ + self._row_to_seq_classification_sample(row) for _, row in dataset.iterrows() + ] return samples def load_data_summarization( self, - feature_column: str = "document", - target_column: str = "summary", - split: str = "test", - subset: str = None, + dataset: pd.DataFrame, ) -> List[Sample]: - """Load the specified split from the dataset library for summarization task. + """ + Load the specified split from the dataset library for summarization task. Args: - feature_column (str): - Name of the column containing the input text or document. - target_column (str): - Name of the column containing the target summary. - split (str): - Name of the split to load (e.g., train, validation, test). - subset (str): - Name of the configuration or subset to load. + dataset (pd.DataFrame): + The input dataset containing the document data and corresponding summaries. + feature_column (str, optional): + Name of the column in the dataset containing the input document data. + Default is "document". + target_column (str, optional): + Name of the column in the dataset containing the target summaries for summarization. + Default is "summary". Returns: List[Sample]: - Loaded split as a list of Sample objects for summarization task. + Loaded split as a list of Sample objects for summarization task, where each + Sample object contains a document and its corresponding summary. """ - if subset: - dataset = self.load_dataset(self.dataset_name, name=subset, split=split) - else: - dataset = self.load_dataset(self.dataset_name, split=split) + if type(self._file_path) == dict: + feature_column = self._file_path.get("feature_column", "document") + target_column = self._file_path.get("target_column", "summary") - if feature_column and target_column: - dataset = dataset.map( - lambda example: { - "document": example[feature_column], - "summary": example[target_column], - } + dataset.rename( + columns={feature_column: "document", target_column: "summary"}, + inplace=True, ) - samples = [self._row_to_sample_summarization(example) for example in dataset] + samples = [ + self._row_to_sample_summarization(row) for _, row in dataset.iterrows() + ] return samples - def load_raw_data( - self, - split: str = "test", - subset: str = None, - ) -> List: - """Loads data into a list""" - if subset: - dataset = self.load_dataset(self.dataset_name, name=subset, split=split) - else: - dataset = self.load_dataset(self.dataset_name, split=split) - - return dataset.to_list() - - def load_data( + def load_data_question_answering( self, - feature_column: str = "text", - target_column: str = "label", - split: str = "test", - subset: str = None, + dataset: pd.DataFrame, ) -> List[Sample]: - """Load the specified data based on the task. + """ + Load the specified split from the dataset library for question-answering task. Args: - feature_column (str): - Name of the column containing the input text or document. - target_column (str): - Name of the column containing the target label or summary. - split (str): - Name of the split to load (e.g., train, validation, test). - subset (str): - Name of the configuration or subset to load. + dataset (pd.DataFrame): + The input dataset containing the passage, question, and corresponding answers. + feature_column (dict, optional): + Dictionary of column names in the dataset containing the input passage and question data. + Default is {"passage": "passage", "question": "question"}. + target_column (str, optional): + Name of the column in the dataset containing the target answers for question-answering. + Default is "answer". Returns: - List[Sample]: - Loaded data as a list of Sample objects. - - Raises: - ValueError: - If an unsupported task is provided. + List[QASample]: + Loaded split as a list of QASample objects for question-answering task, where each + QASample object contains an original question, original context (passage), and the task name. """ - if self.task == "text-classification": - return self.load_data_classification( - feature_column, target_column, split, subset - ) - elif self.task == "summarization": - return self.load_data_summarization( - feature_column, target_column, split, subset + if type(self._file_path) == dict: + feature_column = self._file_path.get( + "feature_column", {"passage": "passage", "question": "question"} ) - else: - raise ValueError(f"Unsupported task: {self.task}") - - @staticmethod - def _row_to_sample_summarization(data_row: Dict[str, str]) -> Sample: - """Convert a row from the dataset into a Sample for summarization. - - Args: - data_row (Dict[str, str]): - Single row of the dataset. + target_column = self._file_path.get("target_column", "answer") - Returns: - Sample: - Row formatted into a Sample object for summarization. - """ - original = data_row.get("document", "") - summary = data_row.get("summary", "") + passage_column = feature_column.get("passage") + question_column = feature_column.get("question") - return SummarizationSample( - original=original, expected_results=summary, task="summarization" - ) + if passage_column in dataset.columns: + dataset.rename(columns={passage_column: "passage"}, inplace=True) + else: + dataset["passage"] = "-" - def export_data(self, data: List[Sample], output_path: str): - """Exports the data to the corresponding format and saves it to 'output_path'. + if question_column in dataset.columns: + dataset.rename(columns={question_column: "question"}, inplace=True) - Args: - data (List[Sample]): - Data to export. - output_path (str): - Path to save the data to. - """ - rows = [] - for s in data: - row = Formatter.process(s, output_format="csv") - rows.append(row) + dataset.rename(columns={target_column: "answer"}, inplace=True) - df = pd.DataFrame(rows, columns=list(self.COLUMN_NAMES[self.task].keys())) - df.to_csv(output_path, index=False, encoding="utf-8") + samples = [ + self._row_to_sample_question_answering(row) for _, row in dataset.iterrows() + ] + return samples - def _row_to_sample_classification(self, data_row: Dict[str, str]) -> Sample: - """Convert a row from the dataset into a Sample for text classification. + def _row_to_ner_sample(self, row: Dict[str, List[str]], sent_index: int) -> Sample: + """Convert a row from the dataset into a Sample for the NER task. Args: - data_row (Dict[str, str]): - Single row of the dataset. + row (Dict[str, List[str]]): + single row of the dataset + sent_index (int): position of the sentence Returns: Sample: - Row formatted into a Sample object. - """ - input_column = next( - ( - col - for col in self.COLUMN_NAMES["text-classification"]["text"] - if col in data_row - ), - None, - ) - output_column = next( - ( - col - for col in self.COLUMN_NAMES["text-classification"]["label"] - if col in data_row - ), - None, - ) - - original = data_row.get(input_column, "") - label = SequenceLabel(label=data_row.get(output_column, ""), score=1) - - return SequenceClassificationSample( - original=original, - expected_results=SequenceClassificationOutput(predictions=[label]), - ) - - -class CSVDataset(_IDataset): - supported_tasks = ["ner", "text-classification"] - COLUMN_NAMES = {task: COLUMN_MAPPER[task] for task in supported_tasks} - - """ - A class to handle CSV files datasets. Subclass of _IDataset. - - Attributes: - _file_path (str): - The path to the data file. - task (str): - Specifies the task of the dataset, which can be either "text-classification" - or "summarization". - delimiter (str): - The delimiter used in the CSV file to separate columns. - """ - - def __init__(self, file_path: str, task: str, **kwargs) -> None: - """ - Initializes a CustomCSVDataset object. - - Args: - file_path (str): - The path to the data file containing the CSV data. - task (str): - Specifies the task of the dataset, which can be either "text-classification" - or "summarization". - **kwargs: - Additional keyword arguments that can be used to configure the dataset (optional). - """ - super().__init__() - self._file_path = file_path - self.task = task - if type(file_path) == dict: - self.delimiter = self._find_delimiter(file_path["name"]) - else: - if task in self.COLUMN_NAMES: - self.COLUMN_NAMES = self.COLUMN_NAMES[self.task] - elif "is_import" not in kwargs: - raise ValueError( - f"Given task ({task}) is not matched with template. \ - CSV dataset can ne only loaded for text-classification and ner!" - ) - self.delimiter = self._find_delimiter(file_path) - - self.column_map = None - self.kwargs = kwargs - - def load_raw_data(self, standardize_columns: bool = False) -> List[Dict]: - """Loads data from a csv file into raw lists of strings - - Args: - standardize_columns (bool): whether to standardize column names - - Returns: - List[Dict]: - parsed CSV file into list of dicts - """ - df = pd.read_csv(self._file_path) - - raw_data = [] - if not standardize_columns: - data = df.to_dict(orient="records") - if self.task == "ner": - for row in data: - raw_data.append( - { - key: (val if isinstance(val, list) else eval(val)) - for key, val in row.items() - } - ) - return raw_data - return data - - for _, row in df.iterrows(): - if not self.column_map: - self.column_map = self._match_column_names(list(row.keys())) - - label_col = ( - self.column_map["ner"] if self.task == "ner" else self.column_map["label"] - ) - - text = row[self.column_map["text"]] - labels = row[label_col] - - raw_data.append( - { - "text": text - if (isinstance(text, list) or self.task != "ner") - else eval(text), - "labels": labels - if (isinstance(labels, list) or self.task != "ner") - else eval(labels), - } - ) - - return raw_data - - def load_data(self) -> List[Sample]: - if self.kwargs.get("is_import", False): - kwargs = self.kwargs.copy() - kwargs.pop("is_import") - return self._import_data(self._file_path, **kwargs) - - if type(self._file_path) == dict: - dataset = pd.read_csv(self._file_path["name"]) - else: - dataset = pd.read_csv(self._file_path) - if not self.column_map: - self.column_map = self._match_column_names(list(dataset.columns)) - - task_functions = { - "text-classification": self.load_data_classification, - "ner": self.load_data_ner, - "summarization": self.load_data_summarization, - "question-answering": self.load_data_question_answering, - } - - if self.task in task_functions: - task_function = task_functions[self.task] - return task_function(dataset) - else: - raise ValueError(f"Unsupported task: {self.task}") - - def export_data(self, data: List[Sample], output_path: str): - """Exports the data to the corresponding format and saves it to 'output_path'. - - Args: - data (List[Sample]): - data to export - output_path (str): - path to save the data to - """ - if self.task == "ner": - final_data = defaultdict(list) - for elt in data: - tokens, labels, testcase_tokens, testcase_labels = Formatter.process( - elt, output_format="csv" - ) - final_data["text"].append(tokens) - final_data["ner"].append(labels) - final_data["testcase_text"].append(testcase_tokens) - final_data["testcase_labels"].append(testcase_labels) - - if ( - sum([len(labels) for labels in final_data["testcase_labels"]]) - * sum([len(tokens) for tokens in final_data["testcase_text"]]) - == 0 - ): - final_data.pop("testcase_text") - final_data.pop("testcase_labels") - - pd.DataFrame(data=final_data).to_csv(output_path, index=False) - - elif self.task == "text-classification": - rows = [] - for s in data: - row = Formatter.process(s, output_format="csv") - rows.append(row) - - df = pd.DataFrame(rows, columns=list(self.COLUMN_NAMES.keys())) - df.to_csv(output_path, index=False, encoding="utf-8") - - def load_data_ner( - self, - dataset: pd.DataFrame, - ) -> List[Sample]: - samples = [] - for row_index, row in dataset.iterrows(): - samples.append(self._row_to_ner_sample(row.to_dict(), row_index)) - - return samples - - def _row_to_ner_sample(self, row: Dict[str, List[str]], sent_index: int) -> Sample: - """Convert a row from the dataset into a Sample for the NER task. - - Args: - row (Dict[str, List[str]]): - single row of the dataset - sent_index (int): position of the sentence - - Returns: - Sample: - row formatted into a Sample object - + row formatted into a Sample object + """ text_col = self.column_map["text"] @@ -1031,170 +791,480 @@ def _row_to_ner_sample(self, row: Dict[str, List[str]], sent_index: int) -> Samp original=original, expected_results=NEROutput(predictions=ner_labels) ) - def load_data_classification( - self, - dataset: pd.DataFrame, - ) -> List[Sample]: + def _row_to_seq_classification_sample(self, row: pd.Series) -> Sample: """ - Load the specified split from the dataset library for classification task. + Convert a row from the dataset into a Sample for the text-classification task Args: - dataset (pd.DataFrame): - The input dataset containing the text data and corresponding labels. - feature_column (str, optional): - Name of the column in the dataset containing the input text data. - Default is "text". - target_column (str, optional): - Name of the column in the dataset containing the target labels for classification. - Default is "label". + row (pd.Series): + Single row of the dataset as a Pandas Series Returns: - List[Sample]: - Loaded split as a list of Sample objects, where each Sample object consists - of an input text and its corresponding label. + Sample: + Row formatted into a Sample object """ if type(self._file_path) == dict: - feature_column = (self._file_path.get("feature_column", "text"),) - target_column = (self._file_path.get("target_column", "label"),) - - if feature_column and target_column: - dataset.rename( - columns={feature_column: "text", target_column: "label"}, inplace=True - ) + original = row.loc["text"] + label = SequenceLabel(label=row.loc["label"], score=1) + else: + original = row[self.column_map["text"]] + # label score should be 1 since it is ground truth, required for __eq__ + label = SequenceLabel(label=row[self.column_map["label"]], score=1) - samples = [ - self._row_to_seq_classification_sample(row) for _, row in dataset.iterrows() - ] - return samples + return SequenceClassificationSample( + original=original, + expected_results=SequenceClassificationOutput(predictions=[label]), + ) - def load_data_summarization( - self, - dataset: pd.DataFrame, - ) -> List[Sample]: + def _row_to_sample_summarization(self, row: pd.Series) -> Sample: """ - Load the specified split from the dataset library for summarization task. + Convert a row from the dataset into a Sample for summarization. Args: - dataset (pd.DataFrame): - The input dataset containing the document data and corresponding summaries. - feature_column (str, optional): - Name of the column in the dataset containing the input document data. - Default is "document". - target_column (str, optional): - Name of the column in the dataset containing the target summaries for summarization. - Default is "summary". + data_row (Dict[str, str]): + Single row of the dataset. + + Returns: + Sample: + Row formatted into a Sample object for summarization. + """ + if type(self._file_path) == dict: + original = row.loc["document"] + summary = row.loc["summary"] + else: + original = row[self.column_map["text"]] + summary = row[self.column_map["summary"]] + + return SummarizationSample( + original=original, expected_results=summary, task="summarization" + ) + + def _row_to_sample_question_answering(self, row: pd.Series) -> QASample: + """ + Convert a row from the dataset into a QASample for question-answering. + + Args: + row (pd.Series): + Single row of the dataset. + + Returns: + QASample: + Row formatted into a QASample object for question-answering. + """ + + if type(self._file_path) == dict: + question = row.loc["question"] + passage = row.loc["passage"] + else: + question = row[self.column_map["text"]] + passage = row[self.column_map["context"]] + + return QASample( + original_question=question, + original_context=passage, + task="question-answering", + ) + + def _match_column_names(self, column_names: List[str]) -> Dict[str, str]: + """Helper function to map original column into standardized ones. + + Args: + column_names (List[str]): + list of column names of the csv file + + Returns: + Dict[str, str]: + mapping from the original column names into 'standardized' names + """ + column_map = {k: None for k in self.COLUMN_NAMES} + for c in column_names: + for key, reference_columns in self.COLUMN_NAMES.items(): + if c.lower() in reference_columns: + column_map[key] = c + + not_referenced_columns = { + k: self.COLUMN_NAMES[k] for k, v in column_map.items() if v is None + } + if "text" in not_referenced_columns and ( + "ner" in not_referenced_columns or "label" in not_referenced_columns + ): + raise OSError( + f"CSV file is invalid. CSV handler works with template column names!\n" + f"{', '.join(not_referenced_columns.keys())} column could not be found in header.\n" + f"You can use following namespaces:\n{not_referenced_columns}" + ) + return column_map + + def _import_data(self, file_name, **kwargs) -> List[Sample]: + """Helper function to import testcases from csv file after editing. + + Args: + file_name (str): path to the csv file + **kwargs: additional arguments to pass to pandas.read_csv + + Returns: + List[Sample]: list of samples + """ + data = pd.read_csv(file_name, **kwargs) + custom_names = { + "question-answering": "qa", + "text-classification": "sequenceclassification", + } + sample_models = { + k.lower(): v for k, v in sample.__dict__.items() if k.endswith("Sample") + } + samples = [] + + for i in data.to_dict(orient="records"): + if self.task in custom_names: + sample_name = custom_names[self.task] + "sample" + else: + sample_name = self.task.lower() + "sample" + samples.append(sample_models[sample_name](**i)) + return samples + + +class JSONDataset(_IDataset): + """Class to handle JSON dataset files. Subclass of _IDataset.""" + + def __init__(self, file_path: str): + """Initializes JSONDataset object. + + Args: + file_path (str): Path to the data file. + """ + super().__init__() + self._file_path = file_path + + def load_raw_data(self): + """Loads data into a raw list""" + raise NotImplementedError() + + def load_data(self) -> List[Sample]: + """Loads data into a list of Sample + + Returns: + List[Sample]: formatted samples + """ + raise NotImplementedError() + + def export_data(self, data: List[Sample], output_path: str): + """Exports the data to the corresponding format and saves it to 'output_path'. + + Args: + data (List[Sample]): + data to export + output_path (str): + path to save the data to + """ + raise NotImplementedError() + + +class JSONLDataset(_IDataset): + """Class to handle JSONL datasets. Subclass of _IDataset.""" + + supported_tasks = [ + "ner", + "text-classification", + "question-answering", + "summarization", + "toxicity", + "translation", + ] + COLUMN_NAMES = {task: COLUMN_MAPPER[task] for task in supported_tasks} + + def __init__(self, file_path: str, task: str) -> None: + """Initializes JSONLDataset object. + + Args: + file_path (str): Path to the data file. + task (str): name of the task to perform + """ + super().__init__() + self._file_path = file_path + self.task = task + self.column_matcher = None + + def _match_column_names(self, column_names: List[str]) -> Dict[str, str]: + """Helper function to map original column into standardized ones. + + Args: + column_names (List[str]): + list of column names of the csv file + + Returns: + Dict[str, str]: + mapping from the original column names into 'standardized' names + """ + column_map = {} + for column in column_names: + for key, reference_columns in self.COLUMN_NAMES[self.task].items(): + if column.lower() in reference_columns: + column_map[key] = column + + not_referenced_columns = [ + col for col in self.COLUMN_NAMES[self.task] if col not in column_map + ] + + if "text" in not_referenced_columns: + raise OSError( + f"Your dataset needs to have at least have a column with one of the following name: " + f"{self.COLUMN_NAMES[self.task]['text']}, found: {column_names}." + ) + + for missing_col in not_referenced_columns: + column_map[missing_col] = None + return column_map + + def load_raw_data(self) -> List[Dict]: + """Loads data from a JSON file into a list""" + with jsonlines.open(self._file_path) as reader: + data = [obj for obj in reader] + return data + + def load_data(self) -> List[Sample]: + """Loads data from a JSONL file and format it into a list of Sample. + + Returns: + list[Sample]: Loaded text data. + """ + data = [] + with jsonlines.open(self._file_path) as reader: + for item in reader: + if self.column_matcher is None: + self.column_matcher = self._match_column_names(item.keys()) + + if self.task == "question-answering": + expected_results = item.get(self.column_matcher["answer"]) + if isinstance(expected_results, str) or isinstance( + expected_results, bool + ): + expected_results = [str(expected_results)] + + data.append( + QASample( + original_question=item[self.column_matcher["text"]], + original_context=item.get( + self.column_matcher["context"], "-" + ), + expected_results=expected_results, + task=self.task, + dataset_name=self._file_path.split("/")[-2], + ) + ) + + elif self.task == "summarization": + expected_results = item.get(self.column_matcher["summary"]) + if isinstance(expected_results, str) or isinstance( + expected_results, bool + ): + expected_results = [str(expected_results)] + data.append( + SummarizationSample( + original=item[self.column_matcher["text"]], + expected_results=expected_results, + task=self.task, + dataset_name=self._file_path.split("/")[-2], + ) + ) + elif self.task == "toxicity": + data.append( + ToxicitySample( + prompt=item[self.column_matcher["text"]], + task=self.task, + dataset_name=self._file_path.split("/")[-2], + ) + ) + + elif self.task == "translation": + data.append( + TranslationSample( + original=item[self.column_matcher["text"]], + task=self.task, + dataset_name=self._file_path.split("/")[-2], + ) + ) + + return data + + def export_data(self, data: List[Sample], output_path: str): + """Exports the data to the corresponding format and saves it to 'output_path'. + + Args: + data (List[Sample]): + data to export + output_path (str): + path to save the data to + """ + raise NotImplementedError() + + +class HuggingFaceDataset(_IDataset): + """Example dataset class that loads data using the Hugging Face dataset library.""" + + supported_tasks = ["text-classification", "summarization"] + + LIB_NAME = "datasets" + COLUMN_NAMES = {task: COLUMN_MAPPER[task] for task in supported_tasks} + + def __init__(self, dataset_name: str, task: str): + """Initialize the HuggingFaceDataset class. + + Args: + dataset_name (str): + Name of the dataset to load. + task (str): + Task to be evaluated on. + """ + self.dataset_name = dataset_name + self.task = task + self._check_datasets_package() + + def _check_datasets_package(self): + """Check if the 'datasets' package is installed and import the load_dataset function. + + Raises an error if the package is not found. + """ + if try_import_lib(self.LIB_NAME): + dataset_module = importlib.import_module(self.LIB_NAME) + self.load_dataset = getattr(dataset_module, "load_dataset") + else: + raise ModuleNotFoundError( + f"The '{self.LIB_NAME}' package is not installed. Please install it using 'pip install {self.LIB_NAME}'." + ) + + def load_data_classification( + self, + feature_column: str = "text", + target_column: str = "label", + split: str = "test", + subset: str = None, + ) -> List[Sample]: + """Load the specified split from the dataset library. + + Args: + feature_column (str): + Name of the feature_column column. + target_column (str): + Name of the target_column column. + split (str): + Name of the split to load (e.g., train, validation, test). + subset (str): + Name of the configuration. Returns: List[Sample]: - Loaded split as a list of Sample objects for summarization task, where each - Sample object contains a document and its corresponding summary. + Loaded split as a list of Sample objects. """ - feature_column = self._file_path.get("feature_column", "document") - target_column = self._file_path.get("target_column", "summary") + if subset: + dataset = self.load_dataset(self.dataset_name, name=subset, split=split) + else: + dataset = self.load_dataset(self.dataset_name, split=split) - dataset.rename( - columns={feature_column: "document", target_column: "summary"}, inplace=True - ) + if feature_column and target_column: + dataset = dataset.map( + lambda example: { + "text": example[feature_column], + "label": example[target_column], + } + ) - samples = [ - self._row_to_sample_summarization(row) for _, row in dataset.iterrows() - ] + samples = [self._row_to_sample_classification(example) for example in dataset] return samples - def load_data_question_answering( + def load_data_summarization( self, - dataset: pd.DataFrame, + feature_column: str = "document", + target_column: str = "summary", + split: str = "test", + subset: str = None, ) -> List[Sample]: - """ - Load the specified split from the dataset library for question-answering task. + """Load the specified split from the dataset library for summarization task. Args: - dataset (pd.DataFrame): - The input dataset containing the passage, question, and corresponding answers. - feature_column (dict, optional): - Dictionary of column names in the dataset containing the input passage and question data. - Default is {"passage": "passage", "question": "question"}. - target_column (str, optional): - Name of the column in the dataset containing the target answers for question-answering. - Default is "answer". + feature_column (str): + Name of the column containing the input text or document. + target_column (str): + Name of the column containing the target summary. + split (str): + Name of the split to load (e.g., train, validation, test). + subset (str): + Name of the configuration or subset to load. Returns: - List[QASample]: - Loaded split as a list of QASample objects for question-answering task, where each - QASample object contains an original question, original context (passage), and the task name. + List[Sample]: + Loaded split as a list of Sample objects for summarization task. """ - feature_column = self._file_path.get( - "feature_column", {"passage": "passage", "question": "question"} - ) - target_column = self._file_path.get("target_column", "answer") - - passage_column = feature_column.get("passage") - question_column = feature_column.get("question") - - if passage_column in dataset.columns: - dataset.rename(columns={passage_column: "passage"}, inplace=True) + if subset: + dataset = self.load_dataset(self.dataset_name, name=subset, split=split) else: - dataset["passage"] = "-" - - if question_column in dataset.columns: - dataset.rename(columns={question_column: "question"}, inplace=True) + dataset = self.load_dataset(self.dataset_name, split=split) - dataset.rename(columns={target_column: "answer"}, inplace=True) + if feature_column and target_column: + dataset = dataset.map( + lambda example: { + "document": example[feature_column], + "summary": example[target_column], + } + ) - samples = [ - self._row_to_sample_question_answering(row) for _, row in dataset.iterrows() - ] + samples = [self._row_to_sample_summarization(example) for example in dataset] return samples - def _row_to_sample_question_answering(self, row: pd.Series) -> QASample: - """ - Convert a row from the dataset into a QASample for question-answering. - - Args: - row (pd.Series): - Single row of the dataset. - - Returns: - QASample: - Row formatted into a QASample object for question-answering. - """ - question = row.loc["question"] - passage = row.loc["passage"] + def load_raw_data( + self, + split: str = "test", + subset: str = None, + ) -> List: + """Loads data into a list""" + if subset: + dataset = self.load_dataset(self.dataset_name, name=subset, split=split) + else: + dataset = self.load_dataset(self.dataset_name, split=split) - return QASample( - original_question=question, - original_context=passage, - task="question-answering", - ) + return dataset.to_list() - def _row_to_seq_classification_sample(self, row: pd.Series) -> Sample: - """ - Convert a row from the dataset into a Sample for the text-classification task + def load_data( + self, + feature_column: str = "text", + target_column: str = "label", + split: str = "test", + subset: str = None, + ) -> List[Sample]: + """Load the specified data based on the task. Args: - row (pd.Series): - Single row of the dataset as a Pandas Series + feature_column (str): + Name of the column containing the input text or document. + target_column (str): + Name of the column containing the target label or summary. + split (str): + Name of the split to load (e.g., train, validation, test). + subset (str): + Name of the configuration or subset to load. Returns: - Sample: - Row formatted into a Sample object + List[Sample]: + Loaded data as a list of Sample objects. + + Raises: + ValueError: + If an unsupported task is provided. """ - if type(self._file_path) == dict: - original = row.loc["text"] - label = SequenceLabel(label=row.loc["label"], score=1) + if self.task == "text-classification": + return self.load_data_classification( + feature_column, target_column, split, subset + ) + elif self.task == "summarization": + return self.load_data_summarization( + feature_column, target_column, split, subset + ) else: - original = row[self.column_map["text"]] - # label score should be 1 since it is ground truth, required for __eq__ - label = SequenceLabel(label=row[self.column_map["label"]], score=1) - - return SequenceClassificationSample( - original=original, - expected_results=SequenceClassificationOutput(predictions=[label]), - ) + raise ValueError(f"Unsupported task: {self.task}") - def _row_to_sample_summarization(self, row: pd.Series) -> Sample: - """ - Convert a row from the dataset into a Sample for summarization. + @staticmethod + def _row_to_sample_summarization(data_row: Dict[str, str]) -> Sample: + """Convert a row from the dataset into a Sample for summarization. Args: data_row (Dict[str, str]): @@ -1204,83 +1274,62 @@ def _row_to_sample_summarization(self, row: pd.Series) -> Sample: Sample: Row formatted into a Sample object for summarization. """ - original = row.loc["document"] - summary = row.loc["summary"] + original = data_row.get("document", "") + summary = data_row.get("summary", "") return SummarizationSample( original=original, expected_results=summary, task="summarization" ) - @staticmethod - def _find_delimiter(file_path: str) -> property: - """ - Helper function in charge of finding the delimiter character in a csv file. - Args: - file_path (str): - location of the csv file to load - Returns: - property: - """ - sniffer = csv.Sniffer() - with open(file_path, encoding="utf-8") as fp: - first_line = fp.readline() - delimiter = sniffer.sniff(first_line).delimiter - return delimiter - - def _match_column_names(self, column_names: List[str]) -> Dict[str, str]: - """Helper function to map original column into standardized ones. + def export_data(self, data: List[Sample], output_path: str): + """Exports the data to the corresponding format and saves it to 'output_path'. Args: - column_names (List[str]): - list of column names of the csv file - - Returns: - Dict[str, str]: - mapping from the original column names into 'standardized' names + data (List[Sample]): + Data to export. + output_path (str): + Path to save the data to. """ - column_map = {k: None for k in self.COLUMN_NAMES} - for c in column_names: - for key, reference_columns in self.COLUMN_NAMES.items(): - if c.lower() in reference_columns: - column_map[key] = c + rows = [] + for s in data: + row = Formatter.process(s, output_format="csv") + rows.append(row) - not_referenced_columns = { - k: self.COLUMN_NAMES[k] for k, v in column_map.items() if v is None - } - if "text" in not_referenced_columns and ( - "ner" in not_referenced_columns or "label" in not_referenced_columns - ): - raise OSError( - f"CSV file is invalid. CSV handler works with template column names!\n" - f"{', '.join(not_referenced_columns.keys())} column could not be found in header.\n" - f"You can use following namespaces:\n{not_referenced_columns}" - ) - return column_map + df = pd.DataFrame(rows, columns=list(self.COLUMN_NAMES[self.task].keys())) + df.to_csv(output_path, index=False, encoding="utf-8") - def _import_data(self, file_name, **kwargs) -> List[Sample]: - """Helper function to import testcases from csv file after editing. + def _row_to_sample_classification(self, data_row: Dict[str, str]) -> Sample: + """Convert a row from the dataset into a Sample for text classification. Args: - file_name (str): path to the csv file - **kwargs: additional arguments to pass to pandas.read_csv + data_row (Dict[str, str]): + Single row of the dataset. Returns: - List[Sample]: list of samples + Sample: + Row formatted into a Sample object. """ - data = pd.read_csv(file_name, **kwargs) - custom_names = { - "question-answering": "qa", - "text-classification": "sequenceclassification", - } - sample_models = { - k.lower(): v for k, v in sample.__dict__.items() if k.endswith("Sample") - } - samples = [] + input_column = next( + ( + col + for col in self.COLUMN_NAMES["text-classification"]["text"] + if col in data_row + ), + None, + ) + output_column = next( + ( + col + for col in self.COLUMN_NAMES["text-classification"]["label"] + if col in data_row + ), + None, + ) - for i in data.to_dict(orient="records"): - if self.task in custom_names: - sample_name = custom_names[self.task] + "sample" - else: - sample_name = self.task.lower() + "sample" - samples.append(sample_models[sample_name](**i)) - return samples + original = data_row.get(input_column, "") + label = SequenceLabel(label=data_row.get(output_column, ""), score=1) + + return SequenceClassificationSample( + original=original, + expected_results=SequenceClassificationOutput(predictions=[label]), + ) From 49a3ced3e08693e5a163dcaed0c784bd9befe798 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Sun, 30 Jul 2023 05:09:41 +0530 Subject: [PATCH 15/24] re-arranged classes --- langtest/datahandler/datasource.py | 72 +++++++++++++++--------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index ace5a91e8..e708f664b 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -392,6 +392,42 @@ def export_data(self, data: List[Sample], output_path: str): fwriter.write(bytes(otext, encoding="utf-8")) +class JSONDataset(_IDataset): + """Class to handle JSON dataset files. Subclass of _IDataset.""" + + def __init__(self, file_path: str): + """Initializes JSONDataset object. + + Args: + file_path (str): Path to the data file. + """ + super().__init__() + self._file_path = file_path + + def load_raw_data(self): + """Loads data into a raw list""" + raise NotImplementedError() + + def load_data(self) -> List[Sample]: + """Loads data into a list of Sample + + Returns: + List[Sample]: formatted samples + """ + raise NotImplementedError() + + def export_data(self, data: List[Sample], output_path: str): + """Exports the data to the corresponding format and saves it to 'output_path'. + + Args: + data (List[Sample]): + data to export + output_path (str): + path to save the data to + """ + raise NotImplementedError() + + class CSVDataset(_IDataset): supported_tasks = [ "ner", @@ -924,42 +960,6 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]: return samples -class JSONDataset(_IDataset): - """Class to handle JSON dataset files. Subclass of _IDataset.""" - - def __init__(self, file_path: str): - """Initializes JSONDataset object. - - Args: - file_path (str): Path to the data file. - """ - super().__init__() - self._file_path = file_path - - def load_raw_data(self): - """Loads data into a raw list""" - raise NotImplementedError() - - def load_data(self) -> List[Sample]: - """Loads data into a list of Sample - - Returns: - List[Sample]: formatted samples - """ - raise NotImplementedError() - - def export_data(self, data: List[Sample], output_path: str): - """Exports the data to the corresponding format and saves it to 'output_path'. - - Args: - data (List[Sample]): - data to export - output_path (str): - path to save the data to - """ - raise NotImplementedError() - - class JSONLDataset(_IDataset): """Class to handle JSONL datasets. Subclass of _IDataset.""" From f82ac20548f8033a385131924c75b426510a2c15 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Sun, 30 Jul 2023 20:10:39 +0530 Subject: [PATCH 16/24] added some checks and support for custom columns ner --- langtest/datahandler/datasource.py | 78 ++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 10 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index e708f664b..e8c82263a 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -648,6 +648,24 @@ def load_data_ner( List[Sample]: Preprocessed data samples for NER task. """ + + if type(self._file_path) == dict: + feature_column = self._file_path.get("feature_column", "text") + target_column = self._file_path.get("target_column", "ner") + + if ( + feature_column not in dataset.columns + or target_column not in dataset.columns + ): + raise ValueError( + f"Columns '{feature_column}' and '{target_column}' not found in the dataset." + ) + + dataset.rename( + columns={feature_column: "text", target_column: "ner"}, + inplace=True, + ) + samples = [] for row_index, row in dataset.iterrows(): samples.append(self._row_to_ner_sample(row.to_dict(), row_index)) @@ -677,8 +695,16 @@ def load_data_classification( of an input text and its corresponding label. """ if type(self._file_path) == dict: - feature_column = (self._file_path.get("feature_column", "text"),) - target_column = (self._file_path.get("target_column", "label"),) + feature_column = self._file_path.get("feature_column", "text") + target_column = self._file_path.get("target_column", "label") + + if ( + feature_column not in dataset.columns + or target_column not in dataset.columns + ): + raise ValueError( + f"Columns '{feature_column}' and '{target_column}' not found in the dataset." + ) if feature_column and target_column: dataset.rename( @@ -716,6 +742,14 @@ def load_data_summarization( feature_column = self._file_path.get("feature_column", "document") target_column = self._file_path.get("target_column", "summary") + if ( + feature_column not in dataset.columns + or target_column not in dataset.columns + ): + raise ValueError( + f"Columns '{feature_column}' and '{target_column}' not found in the dataset." + ) + dataset.rename( columns={feature_column: "document", target_column: "summary"}, inplace=True, @@ -754,10 +788,27 @@ def load_data_question_answering( ) target_column = self._file_path.get("target_column", "answer") - passage_column = feature_column.get("passage") + passage_column = feature_column.get("passage", None) question_column = feature_column.get("question") + dataset_columns = set(dataset.columns) + if ( + "question" not in feature_column + or feature_column["question"] not in dataset_columns + ): + raise ValueError( + f"'feature_column' '{feature_column['question']}' not found in the dataset." + ) + if "answer" not in target_column or target_column not in dataset_columns: + raise ValueError( + f"'target_column' '{target_column}' not found in the dataset." + ) + if passage_column in dataset.columns: + if passage_column not in dataset_columns: + raise ValueError( + f"'feature_column' '{passage_column}' not found in the dataset." + ) dataset.rename(columns={passage_column: "passage"}, inplace=True) else: dataset["passage"] = "-" @@ -786,7 +837,16 @@ def _row_to_ner_sample(self, row: Dict[str, List[str]], sent_index: int) -> Samp """ - text_col = self.column_map["text"] + if type(self._file_path) == dict: + text_col = "text" + ner_col = "ner" + pos_col = "pos" + chunk_col = "chunk" + else: + text_col = self.column_map["text"] + ner_col = self.column_map["ner"] + pos_col = self.column_map["text"] + chunk_col = self.column_map["text"] for key, value in row.items(): if isinstance(value, str): @@ -809,15 +869,13 @@ def _row_to_ner_sample(self, row: Dict[str, List[str]], sent_index: int) -> Samp token = row[text_col][token_indx] ner_labels.append( NERPrediction.from_span( - entity=row[self.column_map["ner"]][token_indx], + entity=row[ner_col][token_indx], word=token, start=cursor, end=cursor + len(token), - pos_tag=row[self.column_map["pos"]][token_indx] - if row.get(self.column_map["pos"], None) - else None, - chunk_tag=row[self.column_map["chunk"]][token_indx] - if row.get(self.column_map["chunk"], None) + pos_tag=row[pos_col][token_indx] if row.get(pos_col, None) else None, + chunk_tag=row[chunk_col][token_indx] + if row.get(chunk_col, None) else None, ) ) From c1b0d8fedc145c784e0ca225afbf671b8dcaffc5 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Sun, 30 Jul 2023 20:11:09 +0530 Subject: [PATCH 17/24] Test(test/test_harness.py): added some test --- tests/test_harness.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/test_harness.py b/tests/test_harness.py index 4a1faf932..2f7dbf039 100644 --- a/tests/test_harness.py +++ b/tests/test_harness.py @@ -224,7 +224,7 @@ def test_harness_edit_import_testcases(self): def test_text_classification_csv_custom_columns(self): """Test loading CSV data with custom column names for text classification.""" - save_dir = "/tmp/saved_HF_data_text_classification_harness_test" + save_dir = "/tmp/saved_csv_data_text_classification_harness_test" tc_harness = Harness( task="text-classification", hub="huggingface", @@ -249,6 +249,33 @@ def test_text_classification_csv_custom_columns(self): self.assertEqual(tc_harness.data, loaded_tc_harness.data) self.assertNotEqual(tc_harness.model, loaded_tc_harness.model) + def test_ner_csv_custom_columns(self): + """Test loading CSV data with custom column names for text classification.""" + save_dir = "/tmp/saved_csv_data_text_classification_harness_test" + tc_harness = Harness( + task="ner", + hub="huggingface", + model="dslim/bert-base-NER", + data={ + "name": r"D:\full_stack\repo\1.2.0\csv\langtest\tests\fixtures\tner.csv", + "feature_column": "tokens", + "target_column": "ner_tags", + }, + ) + tc_harness.data = tc_harness.data[:10] + tc_harness.generate() + tc_harness.save(save_dir) + + loaded_tc_harness = Harness.load( + save_dir=save_dir, + task="ner", + model="dslim/bert-base-NER", + hub="huggingface", + ) + self.assertEqual(tc_harness._config, loaded_tc_harness._config) + self.assertEqual(tc_harness.data, loaded_tc_harness.data) + self.assertNotEqual(tc_harness.model, loaded_tc_harness.model) + class DefaultCodeBlocksTestCase(unittest.TestCase): """ From 55ce03e62dbe213444290e3c39f194dfd4ab7ecf Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Sun, 30 Jul 2023 21:14:58 +0530 Subject: [PATCH 18/24] file path updated --- tests/test_harness.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_harness.py b/tests/test_harness.py index 2f7dbf039..0f613533e 100644 --- a/tests/test_harness.py +++ b/tests/test_harness.py @@ -251,13 +251,13 @@ def test_text_classification_csv_custom_columns(self): def test_ner_csv_custom_columns(self): """Test loading CSV data with custom column names for text classification.""" - save_dir = "/tmp/saved_csv_data_text_classification_harness_test" + save_dir = "/tmp/saved_csv_data_ner_harness_test" tc_harness = Harness( task="ner", hub="huggingface", model="dslim/bert-base-NER", data={ - "name": r"D:\full_stack\repo\1.2.0\csv\langtest\tests\fixtures\tner.csv", + "name": "tests/fixtures/tner.csv", "feature_column": "tokens", "target_column": "ner_tags", }, From aeefe009743fa10668aa84b5747ea12752e1bab1 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Thu, 17 Aug 2023 23:43:17 +0530 Subject: [PATCH 19/24] updated test_harness.py and datasource.py --- langtest/datahandler/datasource.py | 9 +++++---- tests/test_harness.py | 10 ++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index c9d1b6ebf..c738d9372 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -117,6 +117,7 @@ def __init__(self, file_path: dict, task: str, **kwargs) -> None: raise ValueError( "The 'data_source' key must be provided in the 'file_path' dictionary." ) + self._custom_label = file_path self._file_path = file_path.get("data_source") self._class_map = { cls.__name__.replace("Dataset", "").lower(): cls @@ -145,7 +146,7 @@ def load(self) -> List[Sample]: Returns: list[Sample]: Loaded text data. """ - if isinstance(self._custom_label, dict): + if len(self._custom_label) >1 and self.file_ext== "csv": self.init_cls = self._class_map[self.file_ext.replace(".", "")]( self._custom_label, task=self.task, **self.kwargs ) @@ -478,7 +479,7 @@ def __init__(self, file_path: Union[str, Dict], task: str, **kwargs) -> None: Args: file_path (Union[str, Dict]): - The path to the data file or a dictionary containing "name" key with the path + The path to the data file or a dictionary containing "data_source" key with the path task (str): Specifies the task of the dataset, which can be either "text-classification","ner" "question-answering" and "summarization". @@ -489,7 +490,7 @@ def __init__(self, file_path: Union[str, Dict], task: str, **kwargs) -> None: self._file_path = file_path self.task = task if type(file_path) == dict: - self.delimiter = self._find_delimiter(file_path["name"]) + self.delimiter = self._find_delimiter(file_path["data_source"]) else: if task in self.COLUMN_NAMES: self.COLUMN_NAMES = self.COLUMN_NAMES[self.task] @@ -582,7 +583,7 @@ def load_data(self) -> List[Sample]: return self._import_data(self._file_path, **kwargs) if type(self._file_path) == dict: - dataset = pd.read_csv(self._file_path["name"]) + dataset = pd.read_csv(self._file_path["data_source"]) else: dataset = pd.read_csv(self._file_path) if not self.column_map: diff --git a/tests/test_harness.py b/tests/test_harness.py index b728ba379..cdf8f5c42 100644 --- a/tests/test_harness.py +++ b/tests/test_harness.py @@ -221,10 +221,9 @@ def test_text_classification_csv_custom_columns(self): save_dir = "/tmp/saved_csv_data_text_classification_harness_test" tc_harness = Harness( task="text-classification", - hub="huggingface", - model="lvwerra/distilbert-imdb", + model={"model":"lvwerra/distilbert-imdb", "hub":"huggingface"}, data={ - "name": "tests/fixtures/text_classification.csv", + "data_source": "tests/fixtures/text_classification.csv", "feature_column": "text", "target_column": "label", }, @@ -248,10 +247,9 @@ def test_ner_csv_custom_columns(self): save_dir = "/tmp/saved_csv_data_ner_harness_test" tc_harness = Harness( task="ner", - hub="huggingface", - model="dslim/bert-base-NER", + model={"model":"dslim/bert-base-NER", "hub":"huggingface"}, data={ - "name": "tests/fixtures/tner.csv", + "data_source": "tests/fixtures/tner.csv", "feature_column": "tokens", "target_column": "ner_tags", }, From b4579117cbc8e57586764bfa771ff9ac1366b490 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Thu, 17 Aug 2023 23:49:32 +0530 Subject: [PATCH 20/24] fix lint --- langtest/datahandler/datasource.py | 2 +- tests/test_harness.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index c738d9372..6b52e4c58 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -146,7 +146,7 @@ def load(self) -> List[Sample]: Returns: list[Sample]: Loaded text data. """ - if len(self._custom_label) >1 and self.file_ext== "csv": + if len(self._custom_label) > 1 and self.file_ext == "csv": self.init_cls = self._class_map[self.file_ext.replace(".", "")]( self._custom_label, task=self.task, **self.kwargs ) diff --git a/tests/test_harness.py b/tests/test_harness.py index cdf8f5c42..3dc1b12e3 100644 --- a/tests/test_harness.py +++ b/tests/test_harness.py @@ -221,7 +221,7 @@ def test_text_classification_csv_custom_columns(self): save_dir = "/tmp/saved_csv_data_text_classification_harness_test" tc_harness = Harness( task="text-classification", - model={"model":"lvwerra/distilbert-imdb", "hub":"huggingface"}, + model={"model": "lvwerra/distilbert-imdb", "hub": "huggingface"}, data={ "data_source": "tests/fixtures/text_classification.csv", "feature_column": "text", @@ -247,7 +247,7 @@ def test_ner_csv_custom_columns(self): save_dir = "/tmp/saved_csv_data_ner_harness_test" tc_harness = Harness( task="ner", - model={"model":"dslim/bert-base-NER", "hub":"huggingface"}, + model={"model": "dslim/bert-base-NER", "hub": "huggingface"}, data={ "data_source": "tests/fixtures/tner.csv", "feature_column": "tokens", From 299eafb9add918526589fb8dde17e8ca53a78c15 Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 18 Aug 2023 18:06:28 +0530 Subject: [PATCH 21/24] added Loading_Data_with_Custom_Columns notebook --- .../Loading_Data_with_Custom_Columns.ipynb | 7435 +++++++++++++++++ 1 file changed, 7435 insertions(+) create mode 100644 demo/tutorials/misc/Loading_Data_with_Custom_Columns.ipynb diff --git a/demo/tutorials/misc/Loading_Data_with_Custom_Columns.ipynb b/demo/tutorials/misc/Loading_Data_with_Custom_Columns.ipynb new file mode 100644 index 000000000..08924c910 --- /dev/null +++ b/demo/tutorials/misc/Loading_Data_with_Custom_Columns.ipynb @@ -0,0 +1,7435 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAUgAAABcCAYAAAAMJCwKAAAgAElEQVR4nOy9f5gcZ3Xn+znnra5pjcfKZCyNfqDIQgghZMdxZMfGxpbbwhjM2g4h2Ak/Nol3Aw5xEsLu5eHh8vCofNl9uFluLhiwhUi4zib3ZomcZBMgARsjt4RxbGIritcSsiyE0GpleSQLMYxHPd1V59w/qnq6Z6ZnNJJG/Ej6+zw9PW911fueeqvq1Pn9CucASZJokkzZaudirC666KKLcwWZ+y4TveyWJeW4/lKZYYD5mI2m8+YdH61Wk3Tux+uiiy66ODeYYwaZaKUysNSI7xSVtfj4MCPi9t8WLhzY+sADt9fndswuuuiii3ODaO66ShQSM7lvvYj8B6A8/pMIiM4/evToTuDI3I3ZRRdddHHuMIcMMocgC9ysFwx3DBzVyFzCQBpF8VyP10UXXXRxrjDnDBJygdFyl4wiTS3egJPnYrguuuiii3MCPRedem57NHBk3A6pwLxzMVwXXXTRxTnBnEmQSZJ/xP2gaDjhrv00vTSigB12tVqSJNrcf/p+uiFBXXTRxY8ec+7Fvuqq+f1RT/ktgl40PogwbKn/XQgv7KhUsJwBJjNIr10G2UUXXfzocU7iICsV9AfnL4k5nG85//zYKpXv1pMksStv+uT8eKy0RtyWqU9U8U1cU5e9Mb17qtU7anNPWxdddNHF7HEOGOTUTJpKBa1UsC271kYLjh79zyL6bnefP3F4b5JzxLEPvrhw4Z/v7sZMdtFFFz9CnBMGORW5On1V5YLVsUT/CNJrlnXcUzXg+JfU7c5K5ehQ1x7ZRRdd/KhwTsJ8JqMpTW7dzlJc+swykBZ3HpcdAfcMkVAGLVerKHl8UBdddNHFDx3nJMxn2sHMFYrEmrbtPyQxtosuuujitPBDlSDXbwgqDo4grUTtCRJkF1100cWPC+aIQc4uZMdMLAhtzDH/lo7KdhdddNHFjxZzwCATXbuWCNZO8/sWBgdfUvhuCh75hN8mM8P2djfKp4suuvjR4iwYZKLXvq7/YrGeD7jbIBxF3NskyZZ/JTc9LkyBBdP5XNxBwETV8OwwcKJSwarVM6ewiy666OJscEb6bJIkWq0uXOkS/ptqaZ1ZSqsoxQxwU/f28J7Jxzil6LwnG/aDD2zf+rtbz4S2Lrrooou5whlLkCa+LmjP8ix9KXUkEloWxBm+TaTwnDsmok+L6iHcIxcxaBzP0h98bnvlxe1szetLnu0JdtFFF12cKc6YQbprjLgiolKECzXlwVN9Fz2kmdumyPyhNLhGmRhEI9XqnceongFzLIpg0A0s76KLLuYILQaZJAobIZFZMphsgnQ4W7g7ICaAqp2oXHfs4K5dREePthsnZ2BySdPOWS2+K5bTvLG5rcsgu+iiizlBziCTRyIWDpY5ursO5PnPic8QunM3ofgvZ46T2eSp2tB04iRJYkmSpDOmFCau44x77e6II3GZ0s+U0bEyvq+PTc/2Ic8tw5fGJL5l9ky+iy666GJ65AxyydJVuN7OYh/lM88OIQwjz42QygjKMJ6OYlajhzqhd5Q7qFPJO/Ai7Lv5fx7VOHO7CfdZZPJsPtwLe9fxmb2D4H286IuJWYTqAvS8BbgsRmwAGCTL9gFb5mhuuuiii3/lyBlkqsuZN+8OsvogIaqhOgqhRikbJUtHca2TpaM0pE5afzBJNn5m/bb7VGkP8p74/3TtcSapBhODIjvDvj9I+fy7kbCGtF7GrBfPYtwUc8vXd3AIEdC5AEYXXXTRxZkgZ5Alt9yg6BH1sX5gfsHbNOdnriBQ7jVOvpRWqH72rHVYY3bGSytFNBqLkXSQrFFInN70hBffbmiYZYdddNFFF7NDIUECJcgZjytNxtiEA7iRpYqQTu2mubPMsi2AIGKz5LMCmOKmHeMtu3yxiy66OAeI2v6eIthbirVlRGGyq3imlMHJ7bbM60ICzMuatSrsTlmXRrFZqeNddNFFF3OIXEXtIBNOz5CauvfZQ0TqANXqRH47qyK5XYbZRRddnGNMlCDbMUWY7MyR2r3Ys4XjiKC4r61UPnMQsrJpi0lm+olDpfTE4Wo16cS6p6Gviy666GJuMZE1+mTD4/RcyFWsGcRzOpCWAKogHzGyjwATdPbg8QF06d2Vyv2fn75WRbc0WhdddHFuMclJAy3GM7lG4xSHSwp5QLa7W3uwT4t1easHkem1cqHVrWMi0XIXeY9Qa/LHtmOno+cnH801wydt6wa9d9HFjwgdVOxTOVya8N2W1YdE4wXi2YxH5BFERidm5u75/sVPDmAZIEsta/QC9YnHdex9GhrPHJ2YVbH9HDCsRG+6aaCvWg29k3+pVDanlcrzx//lMMr2eW2d08SVMP+lnOuPEdoz485Vptnk7LvTHSdxhbvJ04anw91nXm+hSV87XaeYl4kqdrsXe4oGOy7iWZWKVbJtu2HwfZlnG8VZPC1RCuLgbgMg/ePVfMaHLAZpfakI5gBxTOvHSUzwHGrY0zHHczXWU08tKZ8YyX4f918uwt5VwAwipfF0tbrkvUmS/EQzyZwBJkYClSo6NFRELly0FtjNll1Q1P+05vz/JJ9vF2eARGxqrYV2VIqaC8nE9ONT9lvUmWj2u2VXG9/bDbuHLO+bKf1Ob4OcUqpxIiOrVLAk+e2HIdl62WVLykuXTkfd8wCcGB78UAjRfzCrRyAzVBGapTR4jpjjbbdtiavVY+sybIUIRhaADIJHiB4DHprrMYeGxqK4HF6uIbrYLVMpXgiRBixr1EulenzKTn5skWilglarS/qvrty7LFTlNSby6gWLfJkg/Rw7rrB4FOG4kR1av97/6aGq7CXWw5VKcnxGR10Xs8Omb61A9l0OGXhQPv2tnfzOq/fOWf/JIxFLll2CPbsq3yCK6yj3f2c7d7z8xCmP37Ir5lhpGZEuxp5dCroAedl8JJQR78ElxTmJ7x0G389nnjuI7B0i8eP5+DMwysSVnzown/i5FaitI7rwSk74UpA+xFPcj7P0woPw3C42P/c0YfcBEj/R7HN6RuU+KS6yybgKKRVyzpwk9tRTjD711LQUKsC111nqba6Yyd7vZnvWPvEp9J09KpUkOjR8qC/WeXeKh7fnGToOLghR5GZPcg4Y5Lx5wTL31C2z3BSRM0jLR09H53rAHwKaUmC1urA3w25Q4ZYS4Ro3WyUiKqJ4YcMW0DyyIeBqtZLqARq+AwY/BTz+Iz2Rn2Q0JSd/7mpCuAejTKlkYB8C5oZBJolywZJBotIHSeVW8BSIEB2hkd4BfKHJJzof78rRby9nXvmjZI31CPNxi0GLpBAthCEDF0PCMCE6hNsOFu39Mg39exIfmZZJLn52HRq/DS29kbSxGhFFFEQUHBzDHUxSotJBTP+SZbs/1mSSE+MgRVpSZJP5TG5PqEp2ahWoZVcquivY38QCFq32KVleJ/rm0ATZM3aeQkCQCCd2J3aIEVVkJsn37CCtOyEPgZrgiPrJxBe/uKScuX44aM/HwX8NfBU47hlmDSyr5x+r45ZinoEQ46zGeKuJLYcfrsnjXxaaaqUoqhEiMVEMOoPD9ExQ0lVIuJjcfFYGIkLUj+hNwKn5hKS9qCwDGaD5rIWIfBGWDDzL81OiHiWEftzW4PZOeno/TmQbedm+pR2rj21+9hqi8iZEfhv31WgUIZr32RiDtFgJQRVEIpxVGOsIvdOo2DBVahxvnzkXShL42rai+0nGw9MNE+pM31w7aQzM8WbON27F2+aHgJ9873zTrnre+endIfT8dpaNxTiKoHnWapvtuWi3NRRxQ+WAethd9Ne1RZ4NJrAOn7uKqYkra3dHHLN1pPXlxeJTxRgZmN/A//vcfN75yuHpO7kb5J2FFJfm6cRwgKzxNwj/E6eGiaLWh6SvxFmPllbgBo2xBcQ9v0Wj3s/CAx8i8aFxO+aSfZcS9XycrL4OMyOUFLLDGF/CfRduI0BMlr4c90twW8d5fQsYPvY1vvuq4dxZNNmL3ZTOxnmYTGqfBQwIs+lqMmMYyw+cvEs7fXMNV/WiMlBLqJbTZ+b/SrFlF9HCkfR3Qii/O01PxiIStU+d5Kq1tiWdGoKKY/nLCEXYWS8xVKkkUdcOORdwxl/ycyk/vhAW0Ft+HZmVUVXS9CuUoktxHyREqxitryfxvwdmthU26z3kmtROTD7KC684NuWY+7/TT73+a2j0XsxXkDViSvHtZNn/4MIDnyHxlEXfHsDlA5hdipmhoY5nW8jC3bzn5QemjJ24sujAcn7w4luw7AtTnTQT4iCZJtJnbpjDqXtpqdo5q+yZ0OrYyU+usNUBk+M8f7JQLOi2lhDdlqVjfcJEdU5EUxE9CLbHPT3miKlIHxIGUF2M23KgTJb+c2znDXdXtpwrTHSyzgkSMe57bjlZdmmxxRC/n6h0F5ktQAOkfhNUv0Jy/Wm85DwizSKuQ0naH+674bsrhlny/B+TvZQSlT5CI+1HrZcQ3sBIbQtUh5CfWUccX06jDhqBsJVG9hGGXnFw2kLgL6w4SCL/9+TNp1Gs4sxQVAxXhe+rBMuQIrB8qoMGwAUTFBEZcer5pJ6qNNo5oHvSALPeczycZdK24vuslZvJ/Z+q79kEn7diECfHJZ4+vdUqmrpfEcxX57p06zeRAOJfERu7B0r76uXGcM+YGMRlPOuzLBuUwKVo6UqX8Pj1679bb94/pzqHs6F5ch/5N0yOx5yu/5lspDPRM/m4TmOeaozZn2+bdjgXKnYzHCYK1yC6ODdLZUOkPEpmr8eya8hSRaPXMPiy5SR+4LTjIrdhU45JNirPL6mx8MBfo+k7CKXX5GdkawjxAi5ccZyxxsWk9aW4QVwe4eTI3zH0qoP58dPQMA3j7BzmM9lDfJYe4yRJ7NprP/Gwp/V3hKh86cyKtqu51zJPv9DosSPAYO5JnkRnRw/73KEps+aUztx/O5NKinbTNzXl+5QPcbOo8ERUq2iSJIz3P8n5Nf3DO3176kOXKLPstxOSJNEvPzHQW66Fi9ysb9zmSG6gcLNhj/QDgeN7Ad5wVf6oVquMAMe2b0/23XbbliePHv3eFqE80hw3/y5oSzoO3U7EeJhFqyrU7BaBa55ra15a85Mk01/D6embpRNz/LgZmanl3uDmhsljnQpzrJWMMxq/CRUgMpxvsqh+jO/V/wcS1fAsJu5dRnbychLZf0rypqDDGlOJ5PNwdOMQS57bQ6nnNaR1cPqwrJ8fSMw8/Rncy+ApwgjoPujAbDuez0RMVLHbvdhNJjQeG3l2TOjrX//9pyuVe/+NWe0t7lZkjDTvvxZt4sFcbU9w2f7El39vhJvfNJinNLbR1ZG+uUXrwW6Xb6dWLE+SRLfsWhsNHj0yuH7Dp1bLtvCaRwivuA4WQBY/4jricOhasn/m2vt2fPnL6QFg+HSlnaEh9KuP9i+9Juu5YSty5XUbfCnmPLJN9nuWfSPL0scrleRwXhkp77dS2bQiwy/11FJVVVOxrdsye+3rP7Xz9a998UheZm7higy9/LrruQp0BdssAj3yCPbPlcq926vV3j1JktRnS2vISmURHURzb7XguIuJBpzs4Ne/dmRPMXPtqvN43xddtDtNkuRYs33ZZZt7zz+/foUZ860qputVATz69KEXLxh8ZvDobhsbmz9fe3rWbt2u16x3+XnB5rNBRrZW/cA1lU8+GNGzE5ITM9kyK5UkeuihRQPr19+76pFtevl118urcJaSe2VrW6scuZb0Wat86tFqNT5QqeT9VSr3l2H0cjMbaNJnKqbmCvcc2779vY91GqvOwou3bpPl11TMqIKuV0313oOPVe/aOXX/+8uZ1i6Rbb6Y9cWEVc2iikZZ+OTer3/t93af+so0X/fMnQ3yvj2X4H4NaUMRMdz/jtsvqrP52R2E6ABuq0nTAcRfxyef+wrHV00fjnMmj7Fbffx/kTpRGOWkKm5Riy+IgkzJUJstpqYaTpYUJ4f7nAWq1buOAPedar9WDF2HHzvSdy6NkNImQU50FiVJol/9av+yhfHRm116flHcLgcGkOZNEEAEcVdcUonCgbLKX1+74dN/Ua0e250kSZ0OaB9RALFQvmBwwVvUone523rRkN/iWkjiwm9GpWg7LL4HfusrkEuYW7dlG5Tojzx4DUHVzUTiUW003l+tLvxLM26UEL1PsHUQehGseY754pPRPhi9p1rt2wIc60DqjBhfkUhcPU9HXXbttYMXv+51Q8/kNHZUVydsmzcvW+we/YEIl6q4oYCLikd/0//9F38XLlhe6gn/HuRmcVla1CzNRxZXNfl3HvE3kl2wqVJJdnZikle94Y8HsrGxDaUe/SWMG9xYIKoTGEkeiqcaiR5w2Oos+KvLLttchXqvubwHid6q5PSpuEnQ2C3aWakkV7WPmSSJfvUbFwyW0ujDbtnNiqSIqASNStjDwE3ttFUqj0Rp2LU8ePRRd7+6SZO6mmsoq/EeYBYMsg1z5cVWuYFSOSIdM5BDYE8CUPf9SGMvImuwFOLyJdjoCrj7mbkZeCMs291PI1pNVoTqiB7ETx6j96U6dv4xJKQgkGXzwS7jwgMPkST1001TnL4e5GScczvfRJyWLekcO2m8k/yfJFqtXrA6RPGnIPrP4De4eb+54Vkzxq+BZ3XcU8AjsJUov68S3Zux4M1ffGpJOZfiOp9MMeWxpPZOJXwUZL27q2f1vN+sgWcNwMuOvxENH69U7nvNuBqdaU01KEgZJ0aIVUOs7ksz+A2Nev4Q/Grce90LWpv9muFuKyF8xCj/1k03fXL+bOIR43qtbm7H3a3wSkPLbCD9ov7Rr1YHr9iya+2kJYc7I4rE0JCiGmHEOLEEjZQwX+q22qV0r4j+O5ylbpm25iWPrQTvF5O3u0QfzbKB1ZP7r1TuXRzX7UMq0cfBf9VhgWOYNcav43if7ubmy8F/TSW+5/zz7feGFv70sKg+JSKG5/RhRSygyKpG44LBibdNYpr5MlFdKSqtawORO5dWKpsXTKRvm6mzGMIyEYnHx4AyeE1cpkioM6KIvT4rJIly/3f6gdcXy6AoIjtI64dJXHnx+SHcniCKR4EU95WIrJ05x7oN0wljSaLjtsK0VKHUs5YsNZAU9ypmx3j+sjruu4ii44hAWu8lKr2Z2tjVrL0tym2ns4+rzXecHObzI8aPX9zb1HmpVC9YnRE2icrNbul890wR0yYrLbJFtJ25upu6W+yZXy4e/vC8kcbNUyWacS++uhuOrBb0P7r7cstSLVxammcESB5bKK7uZu7Zmgzf+NBDixbkc+i1PI7eQUxx1KwRu8htKuH95o1lZinuZjjmbX2Cq3umjs8XLb3rByd1PcwmaPv7I0L2zyI6MjHeFXAzRG6MNHzugqGhjZXKp9aQd2rkJocpfTcaYybjBUscxNUtU7N0tbr/IcgVbhYVvNha8yKKgONq1oiRaL2WSu+f2HuirtHHReTd7tni/HwzBVcBXFAR1bbzUMSa46+QEH9w4dDQ73iWPSOqRxAMseJ6ZIjo/FJJV7aGK87RwnJ3W+qeX5e2/QfNGmsLm2lrPlJdhtsCt2J/DNEA5nvghT0zX49JmCsnTb1+MaXyGiw1oEaWfoOFHM+LSVyfYjwOHMctIksHiEpXMbCvb+blpAtMJ4s1+cLi564h6vkAWTqAqqL6NHbyAY4+MAoYFu3A/BmcCDMQ1hJKH+NY/MbChpnHSs6Clok7zCgl/ngwz444x8JtK+snI0kSrVQ2rXDCx1R0vecXILeL5a/nVELphIjsNfc9IcRDImEiE/RMRWWxEG2+9nX3XXLyZKaTw2HGz0noBe/L/1VUo1SQnKG17SqCmmdpFHpeE+L0LUmSqKnXJ3QoqHtWBrnULFuGmZL3aaKKeMs+JCKIiLplkWe2LEjpjmp14eBkp087kiSxSgUT9+2CPi46yd6UF0lWz7I1IcT/u0v0j9dtuO/Prq3c9+bXfnXJsi1b1kaTmWSppOZNHWe80ImD+EoRvcIsNQRVVUSDFT/bhIQrcfWsHrn7r61ff+/VkOhll23uXV8Z/AOV8KtZNtYLFo2fN2IaolGVsB9nt4TosGioC0W/goJFWVbrDaXeD6Csc2cvIupe3C3uphppBs0QGBLy1Etcf8GzbAGeL4ZXVLMy1aAeqOQ25MSqVbRaXdiL+s+6Zf15VpxAca+4yN9Xq0n6Q800ShKF65RM14MMgqRE8X5UHmf32nSciVn9ScZGnyaKQQKIVuixaSs2FCgW4ZMyJZayaPEyNn1rBfftXcnmZ9fw2b03sOQ7mwjRf8fSy9EIgj6O1d/LnWt35IxPjLtW7SPLPkb5vL2okku5cimBv+Wz+/8rn917Awt3D0JVT8UoO8dBdsT0XChx1yLwfE6QnKtyTKeBiT5yz62CrrlDRl+8WQjXFA/nuKoooiaqO71R36QavknGaCb1derhXaJhvVsWk8cwqVlmqqV+Se0DIZTeZ3gqjk728I8nZmrY75buMOe4qi4vJKeBPPOkuZdHZo35SrjuoccW/XUkmRVse1IuRe52EpW6oI+aNQ4gUtYQXeKWXTJZzc+7tyvAlkFy5NRe4Rf3Zb7gc0HjNe4sds90vB6ooI5hWcMQ6ROJ3i6kb45i/+bCRcf/qlod+AJwqOmpbzTESrGk3kZ38yxwN5HIVGSve7bTzU5I0NWIrMOy/lawQ26nVonVqN8CyWPnnffpimjp7WluP8sZjjuCGnAo8+xz5tnfSxSOq9sKcf6tiLzV3fpaHmGP0sbYAkF/CU+HNET1jCxu7w+4qDlfCfDahs0v9ZTWuhvuaZt06nlMs8vP33LL5t4vfvH5WrWKXX2j9pbSsAo3xX2cRvdsGPWvz3wXT4OzYqcb4WX7FuPhKtJ6nKuxjd00xiZ6qe+6aIRNzz6I6M1kYyC6CgmXksie6SvxCGCgcjla2gyhmTgQgffhtpigfWQpwGG88RUyPs6RVROl6MSVIzzEon0fpjzvD2iMrSgkXSPSd5Lpmyj1PsqSpV9G9lQ5fGR/EfIwTbmzM1GxN26EJOETu04ul2dH3+S/IhHuhoQzn37PDAKf+NWxR39/Tc/TZ9zPHKAV4tPGpAQbPHpk0CX+JfD5tN9qriYiJ9wb/3HDhmOPNjfv2rX20JEXXzyo5veAXOHuxUPratYwDfE1sTQuMbfc09tWetidIutEdpqnH80auj2ObbQRxgaiLHqnavR+t6y/RbXg5mgUrQhZulhdzCfFIgKIYwh1N/usRX5P5DIE9ahhsiYS+SOQi/OiGQV7dVPQxYJeDDyZJFPDh5oowmSoVuVLnjUGRMNHRaI+LyQ9mhlJuRqf21CFPjeviMrlaPn69Rs+/alq9dhjlQo0GuDixaJtE9ITTTQC829CfaNQ3yk6r4bbYkPuFA3vxrK+1jUS3DMQW1epbF7gkv0i7oMTcyDERMOwe/qpejn77BNfPj5S/HCgUhnYax56VUu3uzVyVb4ZDKa6yiwbVbeaIHFz3twzcF9dqfzU/GolGSZJrFTZNGDua5quxXH2KCi5mr36e99rLAP2QWKa3dcHvpKiDB5Cs97CHjLfe0axn2cjfiRibPrWKuKe1aR1I4pr1Eef4OjQMZKLWiXDAHTvw2SNEZBeNJSx7A3A508dD6n9aLSu+D9/EIpsXxr1lHweTiD+jwhD42M2+22mG76w6i9Z8u06qncRxVcDZRpjIKEfsVuReAORfpNFS/8W+/W/hOTI5MIas3fStIjPaSharqzE5f0CH0T0g4h/UNo+p9NG9QOi9gF3W3c6FJ17FGxSvJYSLnbzy3MnRpukpaqI/7Xasceq1evG4yIvumh3uviCC3YiPCAhGqG4PXMV1k1hIHO7HogmhDMB4KYhOu6SbQr0fimOXzherRwd/cbDJw6JN+7DssdEI9zb46QwdwZClg20r/Mz3qNDblPXrZbJPVE2dLBaPToK3x95fWXom5h/yt1TL9TUNptqZMgrZjNbuap9dHRkJPoTJ/tdYK+GWIubfeI5NhklmbpZn3t2q0rPPSkL3ghAb/uuzZNonoupB7sbjldh5ESlcnQUjh5Q5L+CPENbFXvH86ElLDUdW6caX+JmOm4eaaq41tiRxvqnN13ZZI5JEat5/DCBexxLc2bbJMrVzfpBBtzTWq5mA1DYFcNSiBZX8pU71Sxbi2XL3QxcwN3cyRMn3Ey1NKAlXdOkO8p8qbstd2tZs91NPfUdUDsx1ck3C5ypCJO4cv93yki4nLS+vAinOU4WHodKEaeZaDOPmedX78PZQVTKGZzZhsK5MzM8HSUdO0ha309aP0BaP0jWOIGIUe6NCAFCWM28+R/B5HMsfnbdxFqStOIan/+fX6KR3oll7ydLdxL1KFFJMQNPe0nTDcTzPkKJTWzad3F+bMtkMdFJMytPdfHMFXMgSorIqED+cUZo+0xoU7RpfSb9PuowKh3X3v7hYrKKXbzv64peJyrz80IWkjNJF3PLhh17II+N22btQc4PPLA7bbhvxX1IhOYDhLtoljV6Bb8cvJ/2cnCOiahmWX3Ig26tVr9br1aTwsaTWLX6vhMmfFk1dApk70uRPjWxKdIjmCg1cftiFA0drFQo+kvSJEksy6wqovtVWyFN7m6ImogOMkskSWK33PJ8bfsjd/1pGuQNZul/EtHdGnpG8WAgaev9InnxCnE1y2K37OJI40/Bomva+2wG0DuF9CiyY/vWux6qVpO0SX+lgp1/vu53T3eIaJ2mKNw80r2XNLrW8pTGCVCNMOVvH3voPUNF8HdxbP7/9q13PYbzpIQSTAjeFVWVsjsHRQPgzegzk1CanyKrxvcN4ToJIXYc1Qjwb6roweZS9OY+X+DSSmWccV+C+4LcOQOCpqLhmEn29Wrl+8OTVwSdHs2XPGcnQY6MDRDF16MaUeqBsZM7iE7sbDk/ig9AIinIA2SZkaVQ6lnOWHrD9J27FXRuh3Ataf3nSMd+lpPRzxHkZ2nUr4lUAr8AACAASURBVOXkS/8HIjuAlNEf9FMq3Uyp9//js/tvnVJkNxEjuT5l6JUHOLzyM8ThtaT1X6Y+9nlK8UE0GGZG/eR8gt5KpA+y6G2Xw8ZxJjnNu8QnqduT2y2IuYGnhtfBUnJ5tPPH2769rQ0pWNGWVPxUl3ASPefAf9SxSyNCfDWiJmBN+5yoIqqHTfwAdPbC+1jPQbf0cBFnaOMrO4orooOO9I+rn+MQBEZcs1pnlVYONetHTiyI45GgEaRtFq6m1wIDHcnwY3n17ok9RlGoC+SFSGWCGwiE0yrc25yHbzx858Ht1aGN4v4rno19VFQeEo0Oi2hK4RgaL3snglmmDstd+DCjcVSYGZjw2hJBjCPFSBPu48sue76myAtISPPzLc5B8nMQZRVu88enq/g2S8F9GtNOPoaITPrdEcFAyiqyF3dEirAmwRR6BVlRrWJr1xLltlyMgkE6uh2V/VLEznrWKLv5RbCkH8Al/KxoZDhWOHNURA+QsTe/dKeTauhn96wkYvREK/BsXe5gQlGG8f71fGbPGyd8Fu99I5959k14I8ZtBFFDxBC/iS27TnEfSUqqdY6uHeWui0Z438tP8K5XHuLoXzzO0OGP4GPvIEv/BNE6acOwdDUiG1my7JKOITxNafKOl9c48ud/g/a9i3r9DtLGnxLFJ9AI6jXQsJhS+WMs3bOqGZI0UcX2JuMZt8xPbY+jzSvj1BCpC1ITpCZyZh+EGlBDfHoJshN959SLPSFPPHZncOJdVgwucjzKQsfAb0isp+fQMHBMVWkvC+wO4tILEkNhMyzGbf2djjKvNfdoUz+104RMYbyGTX64kiTRRqTmkp9H03c/V2+gavWF3SLH/ou4v8fTsd8F+WNURmj6porxRFDPUhC9JoR0DWitKfw0YwUACFNfpM30wsyzurTJSs1XiLur4QvcPPY2ppFL9lkaEXUMiG97kRwZZw5FzwV6Ef8ndxsZZ+aOmmW94K+47JYl5YGBwWU4a1pFkQ1RnkD0ADC+sJ1GpeVZyJYmSaK4r83PurjOKlia7g2hdPA0pr5F55nGQTbVV/cKyCCWKY0xQ/RWouiPCD2fm/iJ/yj/lN6PWx9uSqMGGl/B96KVM4fYOJTHtPOyC9uMw2v2kcUfAdtCFEd5LCSXIvqOZsjYVPrb7J53Lh3lhVXbKcfvx+obCeEQGnImKXI5pu/gwgMxietEFRumMsJTqN2ipDmDo+ZCzdXqLlZ3L75ltm3qAjXwus2kBHSi7xxGII0/jrnEGkkeqNuyXTVvXJd6o6EdCysAVKuYIB0YqBgaVCZyiVlh5uq92Sn3mA06BsmfEZqmgSStVF44uGHDi19qjI1+yN3vEuFA4T0eH89xVKLY1K91UqWI5/TCwTPZMz89/cW3FDpsXso8br2AJrhL0jRk07zkmpCxcRW6SamBO+UU9uCyVzQycTcH3LNYkRXn/yCdLxGXiJb6MENENEsbdXWextLv5jZJDMHcWCoNX/zEE6v6EFbiha3U3VTDCGL/dGYLuZ3FszLOYPQNSGFL1qBEpQFgGSJLO390MSGKgNzuV4oW4375zI4agU5l9NvV96MrhsjsHiwbHY+Qc7uVe3f1zZgt01L/jRUHRvDz/gRr3IOEEUQhrZcpla9mNFsGc/AEpSmIWj2gGJh625uh+aKcZdudVHBcT9MGOUfPcLWKVSpphER9orlHeFzykkLddclVhZz28ZqGDr2lkk3jUUy0Urkwdk72NVlqy/nh6m41F6nLhBqJZ4hxlTLMvN8s0KJzbkX05hxVKsnw0MJlWwaODcVBo4+5Wb9IW9FVHHHWgMduTRUcaIsBPRXG59llvOakC3VEwFrsMZckJY4yZszbdbfzRbStXsr4CGnJ5TBBtnor9lFxjBAPYukCsNeqKJm4iUQK2d5K5ej+rdsu2Ccan3DL+t1dRWxQRFaMjIwckuCL3VtXwtyPoZxe9kzz/Jrc8UxtkPfuvRT8NWSN3K5kthfP9mAetdJrOw3tA2i4FKxMo94P0ev4+D99ie+fGMkXy/r26dHRYq5P80f7dhNK64qCFSuQsJIkyVMaT/UCuf76lOQRWPgzX6As/waXDQgpqsvRxjIS2TdRxT6ddMKNG4tDPBWRmkNNoO5IzZGaS/E5jTbqNReti4fTu4RzJEHmapSWaa7SKC0lU3Nj4xFROdQ+Ty0Hji2uYx09dEkCjdLIgIsvNjOgXfoUHDuheYXjlq3wNJhS59PPOM3whNPs/9Q4VQBztZqkg0d3W+S6WzU6RFtgeZ6P7gAxPiGb5bTombCvkJfTcx8SpD6+zEfBdTVEajbVeVOcSxF9wEpErKm+53lNggjHwWrm2T+4pXVENF9SRUxF+qGxGPe1ZllhRwSQJ5MkMXU9KKJDCCaCOl520VeGYKtVS3mWkGOiQS2r71Orn17udfPkzxYRNxKXI/KMpRouG3n+lb+Enn8bPaXpP0HuIpSeyV9KppTii+ntWwnbjLMNoHbJFwVzz71sQeaf4ohJqBiMHaFeP4Bqmj/O3otob37Krb9nhsjNTWuKmEEuR07Rfjrxu6nPjpF7XSU79xLkxLp/UKmgSZKk69dvWolk42EW446/nA8edOGo5OEhxc+Cu6mIDqpwCbBzciB1ksD6DaxRiRabp4wvN5BXuUnF0n2GRHqGrOicmmDPoP9OZdSa8zxRwk40l9qzMnh5siMwd1n5CYR+0dzHebr0tDQANHegaOruB1TCCcda0qKTB4wrVyVJ8qVOmkClcm+fua+T9vvZx42jB8BHXMMeNfYDa8wzlTy4e74RLhVhZV60Q3C31Mi+AZAGORwsPYSzGjBRAdFV7vYDFaWotI5IhEj69Wr1fSfOrIiwnNnNkiTKsn/fT+Pk68kaoAFE9yAndwDw/JJa5wML5jfwjv301J9Gw7p8jRlbidvFcN0cxDrnWWb5v2ago62c71nWg4t+2vAf1HKeZNY+SR1Y48RMjqntAm2MXyH1fGU6y4qU2BwtBaa1TSe1WxARyzNWbAYJshN9p4/JD0ClklCpJLr1Eb9LVPvNsjw+zwsmaKkiPEua7XMNI7j0uuQ5u7ntSGNxfxvwp8UImveLwoVRaiOvV2WBu1vTGC+CqZaGU8+eELefZ8JbY/bnNc0V4mwtKGf2LCVarS5a7mK3O/5MpXL/1mr1jmm88HDllQN9mcstkqYrEJ9EsIDotwS5zJuhQPlmbb+zZsbE2VEJqWm6C5FDIEvHexHUrAGU3vjwwwvur1SS/fnSxq2eTLhRJVpheXC7FhRansrOznovwyHzuro+jdvaptfZ3frEea2jA4ghqoAcDsiTAFHmQ+bZXtFSxTyFzFXUVpl5LJKNu/TMGmTIGdZXPxsv9kZo7LuEnvJqxk6ChgjsSYLlDq0Z6ywmyvFVIyx69h+Ie9/C2EvzcesnlK/ip1Z8gUsPjHB62eQth9GSvQO4ryJLc6btNkw9O3L65/eDXlwGsbQo2yajICMwOdVwfIXA5k0jrfY0T4umpRTSmqOWhzugrcfcaQmUxcbJAmZ72y0X1CSawYvdib7ZY+3aJB4cXHS1iS/1NN3nrieiKMRbt/pKUb9DVG81y3TcvuS5ucXhYObp0yX1Iy6lRxG/Ec8lcgTFUtMQ3bi+cu//1hjr+X96eg4VMWoLyyYnbw3S83bL0phchcpVJtHIspMHAjxs8PNeLHrkM7C8TpjgZsgdSLTbICevHHk6aB07OyRJYus33Ls60vPuzGxsmVntmfWVz2zH7B9V2Z8GhqJMLAvSGzJfaeLvwv1N7lY4UYq5QcnS2qiKPezwC+30nO55tJ+/4+oi+ywd+6ZoWGd56FbO7NxNlLUhkg/Coru3bHnhcJKQVqsXxnnNR/+ISRp5U5b1XMbVEO03sr+76crjI7t2ra0NHRv6Bwi34pTzQPJ0PrABsd7WlZKdwJE8E+aukfXXf/op1WjY0rQ/L4jhqwVZbtbIox60hFu2uyRHnzytk++E5vM203KsTSSee5Nl6XqcBagaGp2g0djG80PD8MDMYyWJkWxULNpO/eRhRPoRNczWMy9dyrZte1j0zkkHzeKhXvJ8GdffptSzgEbNiGIwHuPFVUdy73el5c2eaclZqkr2skvp6bmYRj1Pa/TsAMYhEtepSy6cUT1IrUsza2Py8ZM16RnahhgK0YTg3kk4i3qQuXTzU72m4VfE7TcJ0Ql1GTUhQhlAQtkss0lDGGAisr3k8QGIR8xH/0IlrMN1QdOp4DmTBJcPx3Hj1akt3HbttYxmLlep6O2epUvBtWlbaxaeyCz9XP1kOtRT1gjBcLS9HuRsMZVlZMW8hDNijNB8lGdPS5IkumULkWSsymx00N0jCdGlAusMUhOGg8mwo6mYlc19UDXEmRW1KNqcHqKKW/b5RoPDUezllg9b8NNw0sCkF4N7/gIJ/ldCuFHUV7lleYiNoG5ZJITbHR+8YHDwi1+r+rGgtVWWydtEdY2bjWsADiaqdcuyh+aVSzvzEKPd6QvbFz0j6BHwFYVwoUBuG3Mxx8zddo6OlIab8/a17faMWXZCkCKHXGKYGHcqKtXqI8k06uypZ2EqNkIyUzTARqCqLBlcisZXktbLedSF7CewO2dC15/aX5CIkTxygMVLHyOetzZP99OVqFxBkuxm0+3ka08V8OKZvo4iYHsjucpaqM6Lvr0Az94KelcRagRuJzC7H6rK4LLL0W/3k922k7suOjI1pKjoKxHj3r2XEOR3SRurwYxo3ijpS9tYYIcY6iRBTodpHDgaxtLM4xqSV0M5mzx4AcMhUzk9G+RpPC31uBzHKQs89zAOoDIghSrtZHnwdrPb3GZlInoos/pfBV48AZDFi/5eG/yChNJveFYvN1W+/CR8vov8RkDfCpK6WX9epqrlnRUXE1V1S78QGPt8Z4/zGbpG5Ix9lB26On0MDv5Ur6Gvxr0XUMtSy/3FROLaj0o/4uNOmMzSybdWKqqK2ZMe/F5ixnn9mUnAHc6jAcdeHHx84cKhTaLh4+QRNCYi6oJC1gv6JhWtAKPu3gfEZqZ5EXsHxDSUEOdxs9q9Dz74nuMA1eojkbL7oIscQFg5ZXwRUwnHzPyfb7nl+RrkNuqr3pDuK9X0gGi0sjBUNZlwbj7FasC2fP8zWXvHARRLI5yL2LT3ZngO/Fe1df81K+Y3289C9DLDWIPIxUVoD2SN3YTy1NUBZ0Jyfcpn9j6IZe/GHUKIsfQm4E8mO+EQYsT72D04zIW/njK6OyJ6Wxn2LiCTdZTC67HoTbgtAIworuPp54nqW7lwRR+mb0PCrdT9m2za8yD+rd2kpUMMMMxL56WE28qk+xZz395LifRdIFdjmVEqK86TpKUt7H5FSlIwtdmZqjo/sHWLLcJriMbkthhMMHVTkyh32bppvq1gPqKFimJKsX+zPwXIZggU74RZPjdJkthrX7u5TMziwnsMnqdw5fbrdkkjV/5D6BnNvPG5gD7ctpzB0A03fOIPGo3yAo3i2y2tNyWaXDV3U3fpQ9wQz+v3FZKPoIiqmttXAvLhavX7w5XKwl6bUUL/yUA+v5+YX4rDxS5mZm0vnPwFpLl0MEntzf/Ns0tCrJ6lzxD8w4svGHzm8IkXFnQebXbocGtYCKndfvvu9IknBv7kpZPyStHwW+T1N1NBiqfBcJMyeWFammuku+dZPSGU1PG9Da+//xtfP76nybSq1W122WVLDp/Xlz4jGq5xyyLaXroI6iIHVdnfnDOAN1yVnPhadeGOoGFDXui3FWCV2yzZL954uv2Y00I+x0paLxNKt1OK3zTrl3CWlUkb/eBQikcYe+kJDi87cdqLcIlvJ02PoNFg7qxhPZv2DY4vP49ofhvI5YSwGWSYWqNOiCKM+USlBZRKg2SNATzLmWpcTmmMfYGGf5yja0+waM9yovJrEF+KyFuJz9uAZ8fRxnFG/BiM1ElLfYQwSFxaSv1kwWR7FPchxkY/xNE1+5vnNlHgG1dX2yeu2e7MhcolTOCkZz7q4qPuPiomNXcZFfOamNda2/Lf3bzmxfb8t3w/cR91l9FsxjjITvTNHqVSvdexQciZFS4mxSdPe5O0CKlINcRDDat/eNEFA/8lL4TQujGvuebEIZEjv25p/ZOi4VirTmOzVqNT2NVM0BTHVCOTEB9yz/6vQPquavU9z7Q7AYq0RcPF2p+pjkGzraMoDMtN+ovtgbT15kvHf5dgrRTCTjjJeICqF7RIUQl4Fo9DVupRkFS1NKIarIitMRFJBTWcPG3O1fJ2HjKjoZRq6DnmWf2PLbLbtq8/+vBFF+1uuw/yfvL9i3Oc1eOpNK9JM60xyyIFuPLK4yPnzcs+hGXvFaI9QeNiPClSIL2Nkef0qqppKJ2wrLElqzdu+Ub1xR2txcEAEnvqqedruD2hWjohzb5a18c8G9sD9XEJrOn1D/A1MwMN7fsX9gd/cmysMTQ5rXLWEPL7BAHL+qifXEy9NrtPkzlqgLQxhPmjpx2ek7hy56uOoeEhQpQ7Yks9g3h6I9Rb9ImmqPQTQoWo52ZKpbcQ4lsJ0QbMLqZRGwSUuHcUZD+1l95Pze7k6CtypqZaJkQpUZybIhq1ftJ0JSJXEKI3EUpvRsONWHYJjbEBRCGeN4LZwzTGfpGjax5vJ7tDPcjJjHBm8axu5BWfFdP8T4H266gdtnVoN3OwZ7JBdqLvtKSvKBL0sKiWTaQPtzJ54QkDqSMyjPsQlu0Usb94tPrbDwM8MMkWXTwQtUrl/g+kfvKL6nabhJ5LgWW49UlegFVB6yI6jNgRS9OnTep/dnxo0WO33747bYZqnH9+ZN//QXZYNX7aMFQL35UEGo2TB0qlUsfsjgaMlDXeIRN0VDFERyRNR4AR1Z4draI2CrghOuI6Ntxxek6GNJSj/aj0mQYTXB1MpaSucqjt3Dvi8eoLB6+5ZvBOVasgvFajaK0QBtyZD152L7SWfC2WuiDH3bMhz+o7UR5UOfbQhmuxR5PEEhK9+sYoVQ0HBN1pmk2gJ5NakW43MaQqSUA0OhZC/DRCLG03mkjpsPjJ0eYSq0mSjFSrfLbuCx8LJreFKGxwD0vzXG0rjpVUJIwAx9zGnvEs+++qjYe2P/q+E52X+YVqlR0i4fEQlZY1tzuYalxv1EYeqX69FarTCpy/d6e7PR6intjVinPNXyBpdvJrPT3DwzOVmpsWlg0T9T4DVj4jI5ijBUNTRr/3GPN69p7u2i7jCPwVIaxFepSe82Cs9mpMHqdU3oPQh3kZiPHm85NnF0GooTJKo3GcNN2PNZ5ArMp7Xr13Qmrh86v3snTPHWR6IyLXEc9bBT6AWR9mEZiimiLRKBKOU39pH7XRv0PCF3jPq4YmO67yJ+uze2+g1LuZdGw5WTadwp3r6I3aX/Kq//W2ZFvFkkTs4986uQLxN6vPQV5b4eixzKvvW3teHmN1775V9ER/i9uaYvW0Dge6EfVAlj3N83922UwXr1K5v5yFk6s9s+UqMmDIAnWPwVLxMOyeHVHVg8C+SuXo6GzVmZtu+uT8kZFohUS+SmCxYX3iquJ+3NWPqLf6hElMJkn0tV/tX1YqlQbaOWFQVxdGouzY/k6LTV150yfnxyO6KgstVScGsiAWsrGDJ08Gi+Ppf69W33dicp+33bYlfv740Apx+jJrHRfU1cZKx77xjTtPmQPcZBqVyr19WQjLQ9YYNNEBy7yfQF4d3RkVYVjdh0APQe+havWOGsWSuW3ZNhEsXJGpz59MTzAZrlbv2teJhqtv3DQY123p1DeLpmPn6/6nvnjnuFzelOB27VobHTl+fJVYusKdpYL3g0YOI2I+BHJo3ryePQ8++JvHTzUHt922JT569IWVmUpvO90A3jN28B8e/A8d+kj06spPrw1ZiJvX7FTXa1b4410D1MMymqnFTWGoUXzP1G7/PxJljCF+75WHzogOgHt39SHzVhIKPpPKML3hEA1bTqO+gCjqwzxGPcI9ArW8iogWoTc+hDeGOLo2v36d1PymY2fZoX7Sl1biuhjxAdA+3CPUR3E5TqZH0Jf28Z6fG5qO3JzbbNqzgZ6+zaS1FTmX7Yj8DdKo/w090duS766oJ4nYJ58bXeaZ3+yEGMfOyktjBqpIJtX3ru3J04U2P7sGjf8WfNW0DNLdKPWAZzt41yt+YeoOE9G+/nG+ZOtLOjT0Xbv9dtL2dZFP19bTYgxJBBcW8/jdZimufK3safucSXWa/phKBW0vedUsk9XcNt3veYzf6fU78zEdeimqgrevTz15/NYa3zP1e/r05BELE49p+3WasI8Wc06SRHftIjp69EJtv4ZF37Ocg6nX9NTzOPGY2V2vU5Exi3VgZoWqwjY7Y+lxCj3NcJxpajlOe9wM+0zYv2CUrf4Vqkwc8+4ZUxJzbrP52Wso9W6mMbYan4FBaqRY+ijiv8Tzq4+TiG1+1hec9Nobxa0X1bP0oBpmmhJk+/f//P88kCSJsenZKwjRF4EFZOn0EmRpHmTpdt698vrZj9fK8ICm6jIXC4ZN7vfHbRGyHxXaM2pgbub63GFittWPN61dzAKniovsACFxZelzl1Cat5n62OXj3qGOfhkB1b1kY7/MC6/eTSJ27y7vS8NL17iEQU5Zx/HUUPfR1OZVhx/gRJKIsXnv2xG9H/N4gkNmAn1uxL2QNv6ad6+8bVYBsF100UUXp0CzWMUwaTact8fTuXJMKExrRqmnHymtgbtJ3PXoEDVTjoh7TfC647Uz/Yh4aipDw0O0ORDCL6AhHndZji9X10afA5aBUtjHZrn+bhdddNHFDMgZZNw4QTZ2pChZNFHymqzSZul84Cou/PU4AZLrJY0bHBHXE47XBK1LpnWh7XPKttcFr5tRH3Pbz7a7cxru/04ZYUPhYe6cqSPFtiyFzJ6d+ynqoosu/rUiZ5CH1p7A2UUUj+YS2jRhMyJKlsbEPeupp2uboVBHh847JioH1b2mntZUqam3fU7ZDjXB63h04OSreo/AxrwOx8n6G9FwMWld8WncP05RXUSOIeSOnblcg7aLLrr4V4vWUonC0+CdY+Pa4Q5ZuhbRm1m4u5ck0eR6SV+M4wOWlo5khLq518y9ZqH4tP/f3m7bniHHYi/tTUQsgTzfslS6sxhzyuJTEyGgYTcuh7r2xy666GKu0JLKgj5NOnaIEGkH70wbXHEvA/8WDVfkbnTX5OVSmzcW71NPjyleV3wio/S2Txtz1NTrkqbH5WR939G1jJK4suSpMpK9EwmvIa3TvnznFIgYuGHZDsbsBFw3RyENXXTRxb92FG5vMf7XoSNktpWoB5gpk4XcIQIr///27ifEruoO4Pj3d869972ZvsQYnTCRYEIYUpmFRBoGXdVAd13ZVpe1QWiKWVYLUkrvUIrYLooUq6YuFARtCy5aKaWbDLRKrS66KLY0dkwlZpKZMB3j+ObNfef+jov73sub/2/GSSPl94FhOMx973Bn8eOce3/n98P5H7L/vapgZR7d6RPS/O++xrRGuaROm1LGIJIUErQQ6fsJWlR/06IUuVxvNqY/Or7vWt7dGWvjXlz2CGW7AVvkcImAS66i5RvMjy2Sn7zpLWONMf8fVi4Vf/HPu3H+LYQM7ZSFiquu7tWHFCWtKaF4lVA8ztzs1W4CZh6jOzhDPSx/spdm0mg5XHSFYxnqaaaFoknQlk+GFubGaeYiSn4ugfuVQ++fILpniXo3ZTtZVeVj1ePRCN4r4v9AaJ3hyl0fbPsAvTHGbGDtXvr5f7+C9w91muC4zXfbUcnqBWX7t8TiKW6Nf+fd8dAfpPJzMeEIyUhzLoER5marPtj5SQnXM+MnYeTBYZyfIKs/g8a7KNsbTLpq/trwAq3mE8wee2GrrHhjjNmO6+Gv+3Lj7L++giQvEXWUUjcPkFW2tuLTgJbvoPpL2vIa82OLOZOdjhAb5CT2H/85cP5OvDyE84+AHKVsb/0cMaIkCSBTEB7mw7FLtno0xuymleEvzx2HH95LO/wY5Nuods4vbkkRgbQ2S2vpjzh+Ra35JqfuWVj3HGg3kD3z/ii++Bo++zqRE8Sy0TvJM8iczjtUH+Ty2GsrvtcYY3bB2kiUR8fBfxwn3fNzQjGBbljdp09nJQmQZAqySFieBvkLTt6mHS+RyiKxdJRxP94fBb5EZILa0CHay/XqxU/cOjjG7vPPuqLlr/mweQpWbuuNMWY3rB8gc1GeO/8NstrPCMVoFSQHLNsdY7Wa9KnDewgBNFR9dKvVaB2fgnMQ2lAG3TSNZ+0EikuA+FdieYqZV3Zem84YYzax/vY3jw75wu9pffIsiEOcDlyUVsQRoyMUyvKSom065wHrIBkxQnsZlpd08ODYPd0TOw165AKqP2UmTG/jXo0xZls2Xhbm0XHLhb0Mhadx8k1Uldh5ntjrM9qp5r3huG+K6+lBdBqUDPD5vjFU5eLTbJ6y/AHt1svMjTdta22MuVE2Xr3lonx05Bqe76O8iEsCzmkv6PWauMsm41U5jL1CE4N+vvsVUq0c01qL0H6C1L3I3G8sOBpjbqitHyzm0THy7gF88jhJ7Vto2IeuetPcW+XJjRgr3iuRi8T4JKfHzu74bo0xZhu2fv6XizI3PovwJGUxSZJdxGdVWbQYtfNWmV7zrN0aRxSRquct7k20/C4Mv3xD/xvGGNNnsLfHuSgzx+bJ0rOE9hkiUyRZwCeuU0OyIn1b452Pq+CbZHRSh14gLJ1hf/t1Zg62dnSXxhizA37gK6cmI/fcqnz8wHka8+dQvQJ6lNrQHlQFYlldGGVNy4beKrFroz7bUqXwJGmLMryDxu8RWs8xO36JuRG1Z47GmP+lwQMkwNRU5H4RFh+4xmO3vcFXH/0dZXsJn9ZIa/Wqx7QH5yIinf1ylPWDo4A4xbkqenrfojZ0haL1JzT8BIk/4jvH3mbiQCA/qUxNbqf5tTHGfGYDZn+vo9eshxRnXwAAALtJREFU+8uOO0aPojIBch/p8HGkPEQobyfGYbzXNdNEdagqIk18chHVC4Tib0TewvNnTn/xam8OSwI3xtwkOw+QcD2Adc9b73+vQcYhXLyDUu9E/GHSZBTxDaJmAGhs4uICoZyB+AGlTEOcxV+7zMzrrV4fW2OMuck+W4Bcrb8Rd34u4fCRhI9Dxp7EsdC5xgfFF8rwcOA/RwK5hF4tSAuMxpjPkd0NkP16W3BYWfJssjPu/LagaIz5nPoUBSp4D1AF9yMAAAAASUVORK5CYII=)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/misc/custom_column_csv.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**LangTest** is an open-source python library designed to help developers deliver safe and effective Natural Language Processing (NLP) models. Whether you are using **John Snow Labs, Hugging Face, Spacy** models or **OpenAI, Cohere, AI21, Hugging Face Inference API and Azure-OpenAI** based LLMs, it has got you covered. You can test any Named Entity Recognition (NER), Text Classification model using the library. We also support testing LLMS for Question-Answering and Summarization tasks on benchmark datasets. The library supports 50+ out of the box tests. These tests fall into robustness, accuracy, bias, representation, toxicity and fairness test categories.\n", + "\n", + "Metrics are calculated by comparing the model's extractions in the original list of sentences against the extractions carried out in the noisy list of sentences. The original annotated labels are not used at any point, we are simply comparing the model against itself in a 2 settings." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Getting started with LangTest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vZhP806JRkdE", + "tags": [] + }, + "outputs": [], + "source": [ + "!pip install \"langtest[langchain,openai,transformers,evaluate]\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Harness and Its Parameters\n", + "\n", + "The Harness class is a testing class for Natural Language Processing (NLP) models. It evaluates the performance of a NLP model on a given task using test data and generates a report with test results.Harness can be imported from the LangTest library in the following way." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:18:48.937990Z", + "iopub.status.busy": "2023-08-18T11:18:48.937430Z", + "iopub.status.idle": "2023-08-18T11:18:50.488391Z", + "shell.execute_reply": "2023-08-18T11:18:50.487834Z", + "shell.execute_reply.started": "2023-08-18T11:18:48.937968Z" + }, + "executionInfo": { + "elapsed": 688, + "status": "ok", + "timestamp": 1692355615490, + "user": { + "displayName": "Prikshit sharma", + "userId": "07819241395213139913" + }, + "user_tz": -330 + }, + "id": "CCGUqqIPQPVa", + "tags": [] + }, + "outputs": [], + "source": [ + "from langtest import Harness" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It imports the Harness class from within the module, that is designed to provide a blueprint or framework for conducting NLP testing, and that instances of the Harness class can be customized or configured for different testing scenarios or environments.\n", + "\n", + "Here is a list of the different parameters that can be passed to the Harness function:\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "| Parameter | Description |\n", + "| ------------- | ----------- |\n", + "| **task** | Task for which the model is to be evaluated (text-classification or ner) |\n", + "| **model** | Specifies the model(s) to be evaluated. Can be a dictionary or a list of dictionaries. Each dictionary should contain 'model' and 'hub' keys. If a path is specified, the dictionary must contain 'model' and 'hub' keys. |\n", + "| **data** | The data to be used for evaluation. A dictionary providing flexibility and options for data sources. It should include the following keys:
  • data_source (mandatory): The source of the data.
  • subset (optional): The subset of the data.
  • feature_column (optional): The column containing the features.
  • target_column (optional): The column containing the target labels.
  • split (optional): The data split to be used.
|\n", + "| **config** | Configuration for the tests to be performed, specified in the form of a YAML file. |\n", + "\n", + "\n", + "
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Custom Column Names for CSV\n", + "\n", + "The `data` parameter also accepts a dictionary that includes the following attributes:\n", + "\n", + "```python\n", + "{\n", + " \"data_source\": \"\",\n", + " \"feature_column\": \"\",\n", + " \"target_column\": \"\",\n", + "}\n", + "\n", + "```\n", + "
\n", + "\n", + "\n", + "| Key | Description |\n", + "| - | - |\n", + "|**data_source** |Represents the name of the dataset being used.|\n", + "|**feature_column** |Specifies the column that contains the input features.\n", + "|**target_column** |Represents the column that contains the target labels or categories.\n", + "\n", + "\n", + "
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8MVy0r7lPUFL" + }, + "source": [ + "# Text-Classification" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "execution": { + "iopub.execute_input": "2023-08-18T11:02:32.505355Z", + "iopub.status.busy": "2023-08-18T11:02:32.505141Z", + "iopub.status.idle": "2023-08-18T11:02:33.158193Z", + "shell.execute_reply": "2023-08-18T11:02:33.157610Z", + "shell.execute_reply.started": "2023-08-18T11:02:32.505339Z" + }, + "executionInfo": { + "elapsed": 714, + "status": "ok", + "timestamp": 1692355847558, + "user": { + "displayName": "Prikshit sharma", + "userId": "07819241395213139913" + }, + "user_tz": -330 + }, + "id": "c9F8P11lQd8J", + "outputId": "df6fdceb-a395-4eb4-916c-5af90aea0ab6", + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-08-18 11:02:32-- https://raw.githubusercontent.com/JohnSnowLabs/langtest/main/demo/data/imdb.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 28937856 (28M) [text/plain]\n", + "Saving to: ‘imdb.csv’\n", + "\n", + "imdb.csv 100%[===================>] 27.60M --.-KB/s in 0.08s \n", + "\n", + "2023-08-18 11:02:33 (365 MB/s) - ‘imdb.csv’ saved [28937856/28937856]\n", + "\n" + ] + } + ], + "source": [ + "# Load a sample.conll\n", + "!wget https://raw.githubusercontent.com/JohnSnowLabs/langtest/main/demo/data/imdb.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Setup and Configure Harness" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 920, + "referenced_widgets": [ + "a5af7cb3a3504a27bfbcb8067e22baa1", + "7233d1441540421c9c2843b9205a925c", + "61941e4dd77c4bb3b744017a5bbec9ad", + "568aeab42d2e42c1afff49331259e44c", + "e3f2d97226fc46919f01b4ee0e2c6b0e", + "42e2d470fbd641d5b743641db8c886ef", + "5428a75e2fd24cc08740d8fbe6dbe66f", + "1223e7fe38b04c88b4632e38b340fb9c", + "c037220a858d44e6967f8ab318da5e19", + "0967da2f32004a969cc121045548a48d", + "44d4e553878747369961b61f4f15be80", + "4224a031df984fdc935e24b42334e2ea", + "fe385707c3f74cf8b8a29a05b84ade5c", + "5797e1b2ae2443009a8d3c6046259ba5", + "ac6f0c3c83ba48cd89a1873f44b16585", + "992d3a415cf541c49a4b1f70467850c2", + "6587cde91bea403b9a12091d68834dde", + "c788ae4ea7f84b698bcf3ecf936ffa79", + "a4d8a97c7e194df3b17682c0a7d78578", + "164fd5a9aa564d2ca5a7e70a869cfe5d", + "0a58308a1a604bd0bd172610e023bc6d", + "91f77d5b775448cfb4fd80400b1ce67d", + "c7d68d0d64fb4807b081aa4af533a6ac", + "7a4b23c0d420415a9372588849fc596a", + "342f8a8110a4483f963257c74cf4b182", + "6d418e239b9241f2854dd6b89f8e895a", + "dfb27bc830f042729e0a2ac17f6f28eb", + "eac5c25c2a074d91a1c3d1140784fa42", + "254f9c127d034fefa6fe822135dff282", + "4c6b33e2902a4ae1aff0bdf162397c78", + "241cfd171c5e4ec4b35df40cbf06b8d9", + "feb1ed3bd3cf4b6a8855c47b2827c8ed", + "114ccd8e9c8449738caee4dfaba0b9bf", + "96e35138ac394407893e5a392d3f15c4", + "dcdb8dcd06024ae389cf42f9146ec760", + "b034e2c7e67a49729cc2e27172c7e12c", + "516e39a0cf4e4fc09cf09da28f6bee97", + "f4f09ec4d9624deaacdeaeb0efa4ac21", + "cdccebee635e4570b27eec736c601860", + "fd718efc7f8c4af0b93e66d7d3163026", + "048af93849664d89afd4ec54677e2480", + "0e8cd24915dc4c7eac076830d84bf35e", + "362e3880a616453cb8e30c05f5056600", + "5aa9452586654839a9cbb0a2a4f236e2", + "034cffa7b1eb414f968a52554e93d732", + "387f28a7ebad434298f6d3389be2dccb", + "7aef86a276f945a7b59633462dcda0dc", + "534f1061303e45c2af004258b347f563", + "9a8d04a738f54a83842591462144de48", + "8d33ffa462c641dc9d937bd5d0388674", + "e0692e0e66394689acf97322e94313d1", + "fc19730e6db34f46a4e3946320c27b20", + "a1421d1aa0eb4c74ba030a9882a462d0", + "2bc7dd17871a4997a65520b197b8c9b1", + "bad9ccd0bc6b478e9629bd4fbe2ad173", + "da3ac361f4c54564b464f6ecb78e7106", + "266170876ef945d1b120b077c99269f5", + "fbff364c69214d9dab368b822782e7dc", + "3af968c9b0914ee7a0886bd22f0a45df", + "de39f19307ca4a10929f77448be713e0", + "2ca42f17eb804ff9a3930a50261d1a29", + "e7767e9b1b9248208a505fc47c6ac22b", + "27a130390ba549dd87e37ad6a48ae295", + "da5ac6f03ff44bd3ad0240c8362a998b", + "496a9fa614e34ac9a88eedcc0bdddf5c", + "52febd9de1c24a89bc22fff9744da2bd" + ] + }, + "execution": { + "iopub.execute_input": "2023-08-18T11:02:33.159988Z", + "iopub.status.busy": "2023-08-18T11:02:33.159545Z", + "iopub.status.idle": "2023-08-18T11:02:39.879067Z", + "shell.execute_reply": "2023-08-18T11:02:39.878530Z", + "shell.execute_reply.started": "2023-08-18T11:02:33.159969Z" + }, + "executionInfo": { + "elapsed": 30683, + "status": "ok", + "timestamp": 1692356020788, + "user": { + "displayName": "Prikshit sharma", + "userId": "07819241395213139913" + }, + "user_tz": -330 + }, + "id": "273VcBwSPFlj", + "outputId": "a7b68f4e-3695-42bd-a63a-ca97a52c2e03", + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-18 11:02:36.886091: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c7137712d96c4fc7886d7ac21b65f619", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading (…)lve/main/config.json: 0%| | 0.00/735 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorytest_typeoriginaltest_case
0accuracymin_macro_f1_score-macro
1robustnessadd_typoOne of the other reviewers has mentioned that ...One of the other reviewers has mentioned that ...
2robustnessadd_typoA wonderful little production. <br /><br />The...A wonderful little production. <br /><br />The...
3robustnessadd_typoI thought this was a wonderful way to spend ti...I thought this was a wonderful way to spend ti...
4robustnessadd_typoBasically there's a family where a little boy ...Basically there's a family where s little boy ...
...............
2996biasreplace_to_female_pronounsNothing is sacred. Just ask Ernie Fosselius. T...Nothing is sacred. Just ask Ernie Fosselius. T...
2997biasreplace_to_female_pronounsI hated it. I hate self-aware pretentious inan...I hated it. I hate self-aware pretentious inan...
2998biasreplace_to_female_pronounsI usually try to be professional and construct...I usually try to be professional and construct...
2999biasreplace_to_female_pronounsIf you like me is going to see this in a film ...If you like me is going to see this in a film ...
3000biasreplace_to_female_pronounsThis is like a zoology textbook, given that it...This is like a zoology textbook, given that it...
\n", + "

3001 rows × 4 columns

\n", + "" + ], + "text/plain": [ + " category test_type \\\n", + "0 accuracy min_macro_f1_score \n", + "1 robustness add_typo \n", + "2 robustness add_typo \n", + "3 robustness add_typo \n", + "4 robustness add_typo \n", + "... ... ... \n", + "2996 bias replace_to_female_pronouns \n", + "2997 bias replace_to_female_pronouns \n", + "2998 bias replace_to_female_pronouns \n", + "2999 bias replace_to_female_pronouns \n", + "3000 bias replace_to_female_pronouns \n", + "\n", + " original \\\n", + "0 - \n", + "1 One of the other reviewers has mentioned that ... \n", + "2 A wonderful little production.

The... \n", + "3 I thought this was a wonderful way to spend ti... \n", + "4 Basically there's a family where a little boy ... \n", + "... ... \n", + "2996 Nothing is sacred. Just ask Ernie Fosselius. T... \n", + "2997 I hated it. I hate self-aware pretentious inan... \n", + "2998 I usually try to be professional and construct... \n", + "2999 If you like me is going to see this in a film ... \n", + "3000 This is like a zoology textbook, given that it... \n", + "\n", + " test_case \n", + "0 macro \n", + "1 One of the other reviewers has mentioned that ... \n", + "2 A wonderful little production.

The... \n", + "3 I thought this was a wonderful way to spend ti... \n", + "4 Basically there's a family where s little boy ... \n", + "... ... \n", + "2996 Nothing is sacred. Just ask Ernie Fosselius. T... \n", + "2997 I hated it. I hate self-aware pretentious inan... \n", + "2998 I usually try to be professional and construct... \n", + "2999 If you like me is going to see this in a film ... \n", + "3000 This is like a zoology textbook, given that it... \n", + "\n", + "[3001 rows x 4 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.testcases()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "harness.generate() method automatically generates the test cases (based on the provided configuration)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Running the tests" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:03:59.539241Z", + "iopub.status.busy": "2023-08-18T11:03:59.539074Z", + "iopub.status.idle": "2023-08-18T11:10:06.810091Z", + "shell.execute_reply": "2023-08-18T11:10:06.809623Z", + "shell.execute_reply.started": "2023-08-18T11:03:59.539226Z" + }, + "id": "tbln0YluPeHd", + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running testcases... : 0%| | 0/3001 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorytest_typeoriginaltest_caseexpected_resultactual_resultpass
0accuracymin_macro_f1_score-macro0.70.0False
1robustnessadd_typoOne of the other reviewers has mentioned that ...One of the other reviewers has mentioned that ...POSITIVEPOSITIVETrue
2robustnessadd_typoA wonderful little production. <br /><br />The...A wonderful little production. <br /><br />The...POSITIVEPOSITIVETrue
3robustnessadd_typoI thought this was a wonderful way to spend ti...I thought this was a wonderful way to spend ti...POSITIVEPOSITIVETrue
4robustnessadd_typoBasically there's a family where a little boy ...Basically there's a family where s little boy ...NEGATIVENEGATIVETrue
........................
2996biasreplace_to_female_pronounsNothing is sacred. Just ask Ernie Fosselius. T...Nothing is sacred. Just ask Ernie Fosselius. T...POSITIVEPOSITIVETrue
2997biasreplace_to_female_pronounsI hated it. I hate self-aware pretentious inan...I hated it. I hate self-aware pretentious inan...NEGATIVENEGATIVETrue
2998biasreplace_to_female_pronounsI usually try to be professional and construct...I usually try to be professional and construct...NEGATIVENEGATIVETrue
2999biasreplace_to_female_pronounsIf you like me is going to see this in a film ...If you like me is going to see this in a film ...NEGATIVENEGATIVETrue
3000biasreplace_to_female_pronounsThis is like a zoology textbook, given that it...This is like a zoology textbook, given that it...NEGATIVENEGATIVETrue
\n", + "

3001 rows × 7 columns

\n", + "" + ], + "text/plain": [ + " category test_type \\\n", + "0 accuracy min_macro_f1_score \n", + "1 robustness add_typo \n", + "2 robustness add_typo \n", + "3 robustness add_typo \n", + "4 robustness add_typo \n", + "... ... ... \n", + "2996 bias replace_to_female_pronouns \n", + "2997 bias replace_to_female_pronouns \n", + "2998 bias replace_to_female_pronouns \n", + "2999 bias replace_to_female_pronouns \n", + "3000 bias replace_to_female_pronouns \n", + "\n", + " original \\\n", + "0 - \n", + "1 One of the other reviewers has mentioned that ... \n", + "2 A wonderful little production.

The... \n", + "3 I thought this was a wonderful way to spend ti... \n", + "4 Basically there's a family where a little boy ... \n", + "... ... \n", + "2996 Nothing is sacred. Just ask Ernie Fosselius. T... \n", + "2997 I hated it. I hate self-aware pretentious inan... \n", + "2998 I usually try to be professional and construct... \n", + "2999 If you like me is going to see this in a film ... \n", + "3000 This is like a zoology textbook, given that it... \n", + "\n", + " test_case expected_result \\\n", + "0 macro 0.7 \n", + "1 One of the other reviewers has mentioned that ... POSITIVE \n", + "2 A wonderful little production.

The... POSITIVE \n", + "3 I thought this was a wonderful way to spend ti... POSITIVE \n", + "4 Basically there's a family where s little boy ... NEGATIVE \n", + "... ... ... \n", + "2996 Nothing is sacred. Just ask Ernie Fosselius. T... POSITIVE \n", + "2997 I hated it. I hate self-aware pretentious inan... NEGATIVE \n", + "2998 I usually try to be professional and construct... NEGATIVE \n", + "2999 If you like me is going to see this in a film ... NEGATIVE \n", + "3000 This is like a zoology textbook, given that it... NEGATIVE \n", + "\n", + " actual_result pass \n", + "0 0.0 False \n", + "1 POSITIVE True \n", + "2 POSITIVE True \n", + "3 POSITIVE True \n", + "4 NEGATIVE True \n", + "... ... ... \n", + "2996 POSITIVE True \n", + "2997 NEGATIVE True \n", + "2998 NEGATIVE True \n", + "2999 NEGATIVE True \n", + "3000 NEGATIVE True \n", + "\n", + "[3001 rows x 7 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.generated_results()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This method returns the generated results in the form of a pandas dataframe, which provides a convenient and easy-to-use format for working with the test results. You can use this method to quickly identify the test cases that failed and to determine where fixes are needed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Final Results\n", + "\n", + "We can call `.report()` which summarizes the results giving information about pass and fail counts and overall test pass/fail flag." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:11:00.323853Z", + "iopub.status.busy": "2023-08-18T11:11:00.323537Z", + "iopub.status.idle": "2023-08-18T11:11:00.340770Z", + "shell.execute_reply": "2023-08-18T11:11:00.340330Z", + "shell.execute_reply.started": "2023-08-18T11:11:00.323835Z" + }, + "id": "4ecGLbrcPk7i", + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorytest_typefail_countpass_countpass_rateminimum_pass_ratepass
0accuracymin_macro_f1_score100%50%False
1robustnessadd_typo999199%70%True
2robustnesslowercase01000100%70%True
3biasreplace_to_female_pronouns1999100%70%True
\n", + "
" + ], + "text/plain": [ + " category test_type fail_count pass_count pass_rate \\\n", + "0 accuracy min_macro_f1_score 1 0 0% \n", + "1 robustness add_typo 9 991 99% \n", + "2 robustness lowercase 0 1000 100% \n", + "3 bias replace_to_female_pronouns 1 999 100% \n", + "\n", + " minimum_pass_rate pass \n", + "0 50% False \n", + "1 70% True \n", + "2 70% True \n", + "3 70% True " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.report()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "auvm2U1ASA7L" + }, + "source": [ + "# NER" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:11:03.790751Z", + "iopub.status.busy": "2023-08-18T11:11:03.790157Z", + "iopub.status.idle": "2023-08-18T11:11:04.303895Z", + "shell.execute_reply": "2023-08-18T11:11:04.303295Z", + "shell.execute_reply.started": "2023-08-18T11:11:03.790729Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "--2023-08-18 11:11:03-- https://raw.githubusercontent.com/JohnSnowLabs/langtest/main/tests/fixtures/tner.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 32782 (32K) [text/plain]\n", + "Saving to: ‘tner.csv’\n", + "\n", + "tner.csv 100%[===================>] 32.01K --.-KB/s in 0s \n", + "\n", + "2023-08-18 11:11:04 (91.0 MB/s) - ‘tner.csv’ saved [32782/32782]\n", + "\n" + ] + } + ], + "source": [ + "# Load a sample.conll\n", + "!wget https://raw.githubusercontent.com/JohnSnowLabs/langtest/main/tests/fixtures/tner.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Setup and Configure Harness" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:11:06.155916Z", + "iopub.status.busy": "2023-08-18T11:11:06.155406Z", + "iopub.status.idle": "2023-08-18T11:11:08.332918Z", + "shell.execute_reply": "2023-08-18T11:11:08.332454Z", + "shell.execute_reply.started": "2023-08-18T11:11:06.155894Z" + }, + "id": "IJbW_lNCPnuW", + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "74bd25c44c3a47978b8b967fe040c215", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading (…)lve/main/config.json: 0%| | 0.00/829 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorytest_typeoriginaltest_case
0robustnessadd_typoThis division also contains the Ventana Wilder...This division also contsins the Ventana Wilder...
1robustnessadd_typo\" So here is the balance NBC has to consider :...\" So here is the balance NBC has to consider :...
2robustnessadd_typoIt is a protest song that \" creates a cinemati...It is a protest song that \" creates a cinemati...
3robustnessadd_typoThis differs from approaches such as IP or Eth...This differs from approaches such as IP or Eyh...
4robustnessadd_typoSince then , only Terry Bradshaw in 147 games ...Since then , only Terry Bradshaw ia 147 games ...
...............
404representationmin_label_representation_count-LOC
405representationmin_label_representation_count-MISC
406representationmin_label_representation_count-ORG
407representationmin_label_representation_count-PER
408representationmin_label_representation_count-O
\n", + "

409 rows × 4 columns

\n", + "" + ], + "text/plain": [ + " category test_type \\\n", + "0 robustness add_typo \n", + "1 robustness add_typo \n", + "2 robustness add_typo \n", + "3 robustness add_typo \n", + "4 robustness add_typo \n", + ".. ... ... \n", + "404 representation min_label_representation_count \n", + "405 representation min_label_representation_count \n", + "406 representation min_label_representation_count \n", + "407 representation min_label_representation_count \n", + "408 representation min_label_representation_count \n", + "\n", + " original \\\n", + "0 This division also contains the Ventana Wilder... \n", + "1 \" So here is the balance NBC has to consider :... \n", + "2 It is a protest song that \" creates a cinemati... \n", + "3 This differs from approaches such as IP or Eth... \n", + "4 Since then , only Terry Bradshaw in 147 games ... \n", + ".. ... \n", + "404 - \n", + "405 - \n", + "406 - \n", + "407 - \n", + "408 - \n", + "\n", + " test_case \n", + "0 This division also contsins the Ventana Wilder... \n", + "1 \" So here is the balance NBC has to consider :... \n", + "2 It is a protest song that \" creates a cinemati... \n", + "3 This differs from approaches such as IP or Eyh... \n", + "4 Since then , only Terry Bradshaw ia 147 games ... \n", + ".. ... \n", + "404 LOC \n", + "405 MISC \n", + "406 ORG \n", + "407 PER \n", + "408 O \n", + "\n", + "[409 rows x 4 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.testcases()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Running the tests" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:11:14.404446Z", + "iopub.status.busy": "2023-08-18T11:11:14.404306Z", + "iopub.status.idle": "2023-08-18T11:11:44.012577Z", + "shell.execute_reply": "2023-08-18T11:11:44.012101Z", + "shell.execute_reply.started": "2023-08-18T11:11:14.404433Z" + }, + "id": "kWIw3EZZP0Q0", + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running testcases... : 0%| | 0/409 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorytest_typeoriginaltest_caseexpected_resultactual_resultpass
0robustnessadd_typoThis division also contains the Ventana Wilder...This division also contsins the Ventana Wilder...Ventana Wilderness: LOC, California: LOCVentana Wilderness: LOC, California: LOCTrue
1robustnessadd_typo\" So here is the balance NBC has to consider :...\" So here is the balance NBC has to consider :...NBC: ORG, The Who: MISC, Animal Practice: MISCNBC: ORG, Xho: MISC, Animal Practice: MISCFalse
2robustnessadd_typoIt is a protest song that \" creates a cinemati...It is a protest song that \" creates a cinemati...America: LOCAmerica: LOCTrue
3robustnessadd_typoThis differs from approaches such as IP or Eth...This differs from approaches such as IP or Eyh...IP: MISC, Ethernet: MISCIP: MISC, Eyhernet: MISCTrue
4robustnessadd_typoSince then , only Terry Bradshaw in 147 games ...Since then , only Terry Bradshaw ia 147 games ...Terry Bradshaw: PER, Joe Montana: PER, Tom Bra...Terry Bradshaw: PER, Joe Montana: PER, Tom Bra...True
........................
404representationmin_label_representation_count-LOC50.0128.0True
405representationmin_label_representation_count-MISC50.080.0True
406representationmin_label_representation_count-ORG50.043.0False
407representationmin_label_representation_count-PER50.051.0True
408representationmin_label_representation_count-O50.02001.0True
\n", + "

409 rows × 7 columns

\n", + "" + ], + "text/plain": [ + " category test_type \\\n", + "0 robustness add_typo \n", + "1 robustness add_typo \n", + "2 robustness add_typo \n", + "3 robustness add_typo \n", + "4 robustness add_typo \n", + ".. ... ... \n", + "404 representation min_label_representation_count \n", + "405 representation min_label_representation_count \n", + "406 representation min_label_representation_count \n", + "407 representation min_label_representation_count \n", + "408 representation min_label_representation_count \n", + "\n", + " original \\\n", + "0 This division also contains the Ventana Wilder... \n", + "1 \" So here is the balance NBC has to consider :... \n", + "2 It is a protest song that \" creates a cinemati... \n", + "3 This differs from approaches such as IP or Eth... \n", + "4 Since then , only Terry Bradshaw in 147 games ... \n", + ".. ... \n", + "404 - \n", + "405 - \n", + "406 - \n", + "407 - \n", + "408 - \n", + "\n", + " test_case \\\n", + "0 This division also contsins the Ventana Wilder... \n", + "1 \" So here is the balance NBC has to consider :... \n", + "2 It is a protest song that \" creates a cinemati... \n", + "3 This differs from approaches such as IP or Eyh... \n", + "4 Since then , only Terry Bradshaw ia 147 games ... \n", + ".. ... \n", + "404 LOC \n", + "405 MISC \n", + "406 ORG \n", + "407 PER \n", + "408 O \n", + "\n", + " expected_result \\\n", + "0 Ventana Wilderness: LOC, California: LOC \n", + "1 NBC: ORG, The Who: MISC, Animal Practice: MISC \n", + "2 America: LOC \n", + "3 IP: MISC, Ethernet: MISC \n", + "4 Terry Bradshaw: PER, Joe Montana: PER, Tom Bra... \n", + ".. ... \n", + "404 50.0 \n", + "405 50.0 \n", + "406 50.0 \n", + "407 50.0 \n", + "408 50.0 \n", + "\n", + " actual_result pass \n", + "0 Ventana Wilderness: LOC, California: LOC True \n", + "1 NBC: ORG, Xho: MISC, Animal Practice: MISC False \n", + "2 America: LOC True \n", + "3 IP: MISC, Eyhernet: MISC True \n", + "4 Terry Bradshaw: PER, Joe Montana: PER, Tom Bra... True \n", + ".. ... ... \n", + "404 128.0 True \n", + "405 80.0 True \n", + "406 43.0 False \n", + "407 51.0 True \n", + "408 2001.0 True \n", + "\n", + "[409 rows x 7 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.generated_results()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Final Results\n", + "\n", + "We can call `.report()` which summarizes the results giving information about pass and fail counts and overall test pass/fail flag." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:12:13.937013Z", + "iopub.status.busy": "2023-08-18T11:12:13.936529Z", + "iopub.status.idle": "2023-08-18T11:12:13.994789Z", + "shell.execute_reply": "2023-08-18T11:12:13.994298Z", + "shell.execute_reply.started": "2023-08-18T11:12:13.936991Z" + }, + "id": "lUi5JvGRP04h", + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorytest_typefail_countpass_countpass_rateminimum_pass_ratepass
0robustnessadd_typo109090%70%True
1robustnessamerican_to_british29898%70%True
2accuracymin_micro_f1_score01100%100%True
3biasreplace_to_female_pronouns0100100%70%True
4biasreplace_to_low_income_country29898%70%True
5fairnessmin_gender_f1_score03100%100%True
6representationmin_label_representation_count1480%100%False
\n", + "
" + ], + "text/plain": [ + " category test_type fail_count pass_count \\\n", + "0 robustness add_typo 10 90 \n", + "1 robustness american_to_british 2 98 \n", + "2 accuracy min_micro_f1_score 0 1 \n", + "3 bias replace_to_female_pronouns 0 100 \n", + "4 bias replace_to_low_income_country 2 98 \n", + "5 fairness min_gender_f1_score 0 3 \n", + "6 representation min_label_representation_count 1 4 \n", + "\n", + " pass_rate minimum_pass_rate pass \n", + "0 90% 70% True \n", + "1 98% 70% True \n", + "2 100% 100% True \n", + "3 100% 70% True \n", + "4 98% 70% True \n", + "5 100% 100% True \n", + "6 80% 100% False " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.report()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Question-Answering" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### OpenAI Model Testing For Question Answering\n", + "\n", + "In this section, we dive into testing of OpenAI models in Question Answering task.\n", + "\n", + "LangTest supports robustness tests for LLM testing for now." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import openai\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data : [Question Answering Dataset](https://www.kaggle.com/datasets/ananthu017/squad-csv-format)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Setup and Configure Harness" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T12:00:38.961075Z", + "iopub.status.busy": "2023-08-18T12:00:38.960622Z", + "iopub.status.idle": "2023-08-18T12:00:45.628983Z", + "shell.execute_reply": "2023-08-18T12:00:45.628453Z", + "shell.execute_reply.started": "2023-08-18T12:00:38.961054Z" + }, + "id": "qQMe9WUoPxeV", + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Configuration : \n", + " {\n", + " \"model_parameters\": {\n", + " \"temperature\": 0.2,\n", + " \"max_tokens\": 64\n", + " },\n", + " \"tests\": {\n", + " \"defaults\": {\n", + " \"min_pass_rate\": 1.0\n", + " },\n", + " \"robustness\": {\n", + " \"add_typo\": {\n", + " \"min_pass_rate\": 0.7\n", + " },\n", + " \"lowercase\": {\n", + " \"min_pass_rate\": 0.7\n", + " }\n", + " }\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "harness = Harness(task=\"question-answering\",\n", + " model={\"model\":\"text-davinci-003\",\"hub\":\"openai\"},\n", + " data={\"data_source\":\"SQuAD_csv.csv\",\n", + " \"feature_column\":{\"passage\": \"context\", \"question\": \"question\"},\n", + " \"target_column\":'answer_start',\n", + " })" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T12:00:45.630249Z", + "iopub.status.busy": "2023-08-18T12:00:45.629906Z", + "iopub.status.idle": "2023-08-18T12:00:45.633945Z", + "shell.execute_reply": "2023-08-18T12:00:45.633511Z", + "shell.execute_reply.started": "2023-08-18T12:00:45.630232Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'tests': {'defaults': {'min_pass_rate': 0.5},\n", + " 'robustness': {'add_typo': {'min_pass_rate': 0.7},\n", + " 'lowercase': {'min_pass_rate': 0.7}}}}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.configure({\n", + " \"tests\":{\n", + " \"defaults\":{\"min_pass_rate\":0.5},\n", + " \"robustness\":{\n", + " \"add_typo\":{\"min_pass_rate\":0.7},\n", + " \"lowercase\":{\"min_pass_rate\":0.7},\n", + " }\n", + " }\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T12:00:45.634757Z", + "iopub.status.busy": "2023-08-18T12:00:45.634598Z", + "iopub.status.idle": "2023-08-18T12:00:45.740123Z", + "shell.execute_reply": "2023-08-18T12:00:45.739660Z", + "shell.execute_reply.started": "2023-08-18T12:00:45.634742Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "harness.data=harness.data[:20]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T12:24:17.271723Z", + "iopub.status.busy": "2023-08-18T12:24:17.271286Z", + "iopub.status.idle": "2023-08-18T12:24:17.274167Z", + "shell.execute_reply": "2023-08-18T12:24:17.273722Z", + "shell.execute_reply.started": "2023-08-18T12:24:17.271702Z" + }, + "tags": [] + }, + "source": [ + "### Generating the test cases." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T12:00:45.741371Z", + "iopub.status.busy": "2023-08-18T12:00:45.741065Z", + "iopub.status.idle": "2023-08-18T12:00:45.819530Z", + "shell.execute_reply": "2023-08-18T12:00:45.819079Z", + "shell.execute_reply.started": "2023-08-18T12:00:45.741355Z" + }, + "id": "9iiNFfcJP8Zt", + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generating testcases...: 100%|██████████| 1/1 [00:00<00:00, 11335.96it/s]\n" + ] + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.generate()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T12:00:45.820370Z", + "iopub.status.busy": "2023-08-18T12:00:45.820067Z", + "iopub.status.idle": "2023-08-18T12:00:45.887843Z", + "shell.execute_reply": "2023-08-18T12:00:45.887432Z", + "shell.execute_reply.started": "2023-08-18T12:00:45.820355Z" + }, + "id": "EuRlQzS7P8x8", + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorytest_typeoriginal_contextoriginal_questionperturbed_contextperturbed_question
0robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyonce start becoming popular?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyonce start brcoming popular?
1robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What areas did Beyonce compete in when she was...Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What areas did Beyonce compete in whrn she was...
2robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyonce leave Destiny's Child and bec...Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beypnce leave Destiny's Child and bec...
3robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In what city and state did Beyonce grow up?Beyoncé Giselle Knpwles-Carter (/biːˈjɒnseɪ/ b...In what city and state did Feyonce grow up?
4robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In which decade did Beyonce become famous?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In wmich decade did Beyonce become famous?
5robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In what R&B group was she the lead singer?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...Ln what R&B group was she the lead singer?
6robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What album made her a worldwide known artist?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...Whay album made her a worldwide known artist?
7robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...Who managed the Destiny's Child group?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...Who managed the Destiny's Fhild group?
8robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyoncé rise to fame?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...Wnen did Beyoncé rise to fame?
9robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What role did Beyoncé have in Destiny's Child?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What role did Beyoncé have in Destinp's Child?
10robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was the first album Beyoncé released as a...Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was the first album Beyoncé released as a...
11robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyoncé release Dangerously in Love?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When wid Beyoncé release Dangerously in Love?
12robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...How many Grammy awards did Beyoncé win for her...Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...How many Grammy awards did Beypncé win for her...
13robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was Beyoncé's role in Destiny's Child?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was Beyoncé's role in Destiny's Chuld?
14robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was the name of Beyoncé's first solo album?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was the name pf Beyoncé's first solo album?
15robustnessadd_typoFollowing the disbandment of Destiny's Child i...After her second solo album, what other entert...Following the disbandment of Destiny's Child i...After her second solo album, uhat other entert...
16robustnessadd_typoFollowing the disbandment of Destiny's Child i...Which artist did Beyonce marry?Following the disbandment of Destiny's Child i...Which artist did Neyonce marry?
17robustnessadd_typoFollowing the disbandment of Destiny's Child i...To set the record for Grammys, how many did Be...Following the disbandment of Destiny's Child i...To wet the record for Grammys, how many did Be...
18robustnessadd_typoFollowing the disbandment of Destiny's Child i...For what movie did Beyonce receive her first ...Following the disbandment of Destiny's Child i...For what movie fid Beyonce receive her first ...
19robustnessadd_typoFollowing the disbandment of Destiny's Child i...When did Beyonce take a hiatus in her career a...Following the disbandment of Destiny's Child i...When did Beyonce take a hiatus in her career a...
20robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyonce start becoming popular?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...when did beyonce start becoming popular?
21robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What areas did Beyonce compete in when she was...beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...what areas did beyonce compete in when she was...
22robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyonce leave Destiny's Child and bec...beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...when did beyonce leave destiny's child and bec...
23robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In what city and state did Beyonce grow up?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...in what city and state did beyonce grow up?
24robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In which decade did Beyonce become famous?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...in which decade did beyonce become famous?
25robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In what R&B group was she the lead singer?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...in what r&b group was she the lead singer?
26robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What album made her a worldwide known artist?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...what album made her a worldwide known artist?
27robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...Who managed the Destiny's Child group?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...who managed the destiny's child group?
28robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyoncé rise to fame?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...when did beyoncé rise to fame?
29robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What role did Beyoncé have in Destiny's Child?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...what role did beyoncé have in destiny's child?
30robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was the first album Beyoncé released as a...beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...what was the first album beyoncé released as a...
31robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyoncé release Dangerously in Love?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...when did beyoncé release dangerously in love?
32robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...How many Grammy awards did Beyoncé win for her...beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...how many grammy awards did beyoncé win for her...
33robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was Beyoncé's role in Destiny's Child?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...what was beyoncé's role in destiny's child?
34robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was the name of Beyoncé's first solo album?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...what was the name of beyoncé's first solo album?
35robustnesslowercaseFollowing the disbandment of Destiny's Child i...After her second solo album, what other entert...following the disbandment of destiny's child i...after her second solo album, what other entert...
36robustnesslowercaseFollowing the disbandment of Destiny's Child i...Which artist did Beyonce marry?following the disbandment of destiny's child i...which artist did beyonce marry?
37robustnesslowercaseFollowing the disbandment of Destiny's Child i...To set the record for Grammys, how many did Be...following the disbandment of destiny's child i...to set the record for grammys, how many did be...
38robustnesslowercaseFollowing the disbandment of Destiny's Child i...For what movie did Beyonce receive her first ...following the disbandment of destiny's child i...for what movie did beyonce receive her first g...
39robustnesslowercaseFollowing the disbandment of Destiny's Child i...When did Beyonce take a hiatus in her career a...following the disbandment of destiny's child i...when did beyonce take a hiatus in her career a...
\n", + "
" + ], + "text/plain": [ + " category test_type original_context \\\n", + "0 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "1 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "2 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "3 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "4 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "5 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "6 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "7 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "8 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "9 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "10 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "11 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "12 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "13 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "14 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "15 robustness add_typo Following the disbandment of Destiny's Child i... \n", + "16 robustness add_typo Following the disbandment of Destiny's Child i... \n", + "17 robustness add_typo Following the disbandment of Destiny's Child i... \n", + "18 robustness add_typo Following the disbandment of Destiny's Child i... \n", + "19 robustness add_typo Following the disbandment of Destiny's Child i... \n", + "20 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "21 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "22 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "23 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "24 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "25 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "26 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "27 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "28 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "29 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "30 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "31 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "32 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "33 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "34 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "35 robustness lowercase Following the disbandment of Destiny's Child i... \n", + "36 robustness lowercase Following the disbandment of Destiny's Child i... \n", + "37 robustness lowercase Following the disbandment of Destiny's Child i... \n", + "38 robustness lowercase Following the disbandment of Destiny's Child i... \n", + "39 robustness lowercase Following the disbandment of Destiny's Child i... \n", + "\n", + " original_question \\\n", + "0 When did Beyonce start becoming popular? \n", + "1 What areas did Beyonce compete in when she was... \n", + "2 When did Beyonce leave Destiny's Child and bec... \n", + "3 In what city and state did Beyonce grow up? \n", + "4 In which decade did Beyonce become famous? \n", + "5 In what R&B group was she the lead singer? \n", + "6 What album made her a worldwide known artist? \n", + "7 Who managed the Destiny's Child group? \n", + "8 When did Beyoncé rise to fame? \n", + "9 What role did Beyoncé have in Destiny's Child? \n", + "10 What was the first album Beyoncé released as a... \n", + "11 When did Beyoncé release Dangerously in Love? \n", + "12 How many Grammy awards did Beyoncé win for her... \n", + "13 What was Beyoncé's role in Destiny's Child? \n", + "14 What was the name of Beyoncé's first solo album? \n", + "15 After her second solo album, what other entert... \n", + "16 Which artist did Beyonce marry? \n", + "17 To set the record for Grammys, how many did Be... \n", + "18 For what movie did Beyonce receive her first ... \n", + "19 When did Beyonce take a hiatus in her career a... \n", + "20 When did Beyonce start becoming popular? \n", + "21 What areas did Beyonce compete in when she was... \n", + "22 When did Beyonce leave Destiny's Child and bec... \n", + "23 In what city and state did Beyonce grow up? \n", + "24 In which decade did Beyonce become famous? \n", + "25 In what R&B group was she the lead singer? \n", + "26 What album made her a worldwide known artist? \n", + "27 Who managed the Destiny's Child group? \n", + "28 When did Beyoncé rise to fame? \n", + "29 What role did Beyoncé have in Destiny's Child? \n", + "30 What was the first album Beyoncé released as a... \n", + "31 When did Beyoncé release Dangerously in Love? \n", + "32 How many Grammy awards did Beyoncé win for her... \n", + "33 What was Beyoncé's role in Destiny's Child? \n", + "34 What was the name of Beyoncé's first solo album? \n", + "35 After her second solo album, what other entert... \n", + "36 Which artist did Beyonce marry? \n", + "37 To set the record for Grammys, how many did Be... \n", + "38 For what movie did Beyonce receive her first ... \n", + "39 When did Beyonce take a hiatus in her career a... \n", + "\n", + " perturbed_context \\\n", + "0 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "1 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "2 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "3 Beyoncé Giselle Knpwles-Carter (/biːˈjɒnseɪ/ b... \n", + "4 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "5 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "6 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "7 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "8 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "9 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "10 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "11 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "12 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "13 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "14 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "15 Following the disbandment of Destiny's Child i... \n", + "16 Following the disbandment of Destiny's Child i... \n", + "17 Following the disbandment of Destiny's Child i... \n", + "18 Following the disbandment of Destiny's Child i... \n", + "19 Following the disbandment of Destiny's Child i... \n", + "20 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "21 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "22 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "23 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "24 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "25 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "26 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "27 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "28 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "29 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "30 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "31 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "32 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "33 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "34 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "35 following the disbandment of destiny's child i... \n", + "36 following the disbandment of destiny's child i... \n", + "37 following the disbandment of destiny's child i... \n", + "38 following the disbandment of destiny's child i... \n", + "39 following the disbandment of destiny's child i... \n", + "\n", + " perturbed_question \n", + "0 When did Beyonce start brcoming popular? \n", + "1 What areas did Beyonce compete in whrn she was... \n", + "2 When did Beypnce leave Destiny's Child and bec... \n", + "3 In what city and state did Feyonce grow up? \n", + "4 In wmich decade did Beyonce become famous? \n", + "5 Ln what R&B group was she the lead singer? \n", + "6 Whay album made her a worldwide known artist? \n", + "7 Who managed the Destiny's Fhild group? \n", + "8 Wnen did Beyoncé rise to fame? \n", + "9 What role did Beyoncé have in Destinp's Child? \n", + "10 What was the first album Beyoncé released as a... \n", + "11 When wid Beyoncé release Dangerously in Love? \n", + "12 How many Grammy awards did Beypncé win for her... \n", + "13 What was Beyoncé's role in Destiny's Chuld? \n", + "14 What was the name pf Beyoncé's first solo album? \n", + "15 After her second solo album, uhat other entert... \n", + "16 Which artist did Neyonce marry? \n", + "17 To wet the record for Grammys, how many did Be... \n", + "18 For what movie fid Beyonce receive her first ... \n", + "19 When did Beyonce take a hiatus in her career a... \n", + "20 when did beyonce start becoming popular? \n", + "21 what areas did beyonce compete in when she was... \n", + "22 when did beyonce leave destiny's child and bec... \n", + "23 in what city and state did beyonce grow up? \n", + "24 in which decade did beyonce become famous? \n", + "25 in what r&b group was she the lead singer? \n", + "26 what album made her a worldwide known artist? \n", + "27 who managed the destiny's child group? \n", + "28 when did beyoncé rise to fame? \n", + "29 what role did beyoncé have in destiny's child? \n", + "30 what was the first album beyoncé released as a... \n", + "31 when did beyoncé release dangerously in love? \n", + "32 how many grammy awards did beyoncé win for her... \n", + "33 what was beyoncé's role in destiny's child? \n", + "34 what was the name of beyoncé's first solo album? \n", + "35 after her second solo album, what other entert... \n", + "36 which artist did beyonce marry? \n", + "37 to set the record for grammys, how many did be... \n", + "38 for what movie did beyonce receive her first g... \n", + "39 when did beyonce take a hiatus in her career a... " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.testcases()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "harness.generate() method automatically generates the test cases (based on the provided configuration)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Running the tests" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T12:00:49.796459Z", + "iopub.status.busy": "2023-08-18T12:00:49.795980Z", + "iopub.status.idle": "2023-08-18T12:01:43.917952Z", + "shell.execute_reply": "2023-08-18T12:01:43.917472Z", + "shell.execute_reply.started": "2023-08-18T12:00:49.796439Z" + }, + "id": "zaZXpq4YP9FO", + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running testcases... : 100%|██████████| 40/40 [00:54<00:00, 1.35s/it]\n" + ] + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Called after harness.generate() and is to used to run all the tests. Returns a pass/fail flag for each test." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generated Results" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T12:01:43.919012Z", + "iopub.status.busy": "2023-08-18T12:01:43.918571Z", + "iopub.status.idle": "2023-08-18T12:01:55.900598Z", + "shell.execute_reply": "2023-08-18T12:01:55.900131Z", + "shell.execute_reply.started": "2023-08-18T12:01:43.918996Z" + }, + "id": "DQ5tpuR5P9Y7", + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorytest_typeoriginal_contextoriginal_questionperturbed_contextperturbed_questionexpected_resultactual_resultpass
0robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyonce start becoming popular?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyonce start brcoming popular?Beyoncé rose to fame in the late 1990s as the...Beyoncé rose to fame in the late 1990s as lea...True
1robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What areas did Beyonce compete in when she was...Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What areas did Beyonce compete in whrn she was...Beyoncé competed in various singing and danci...Beyoncé competed in various singing and danci...True
2robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyonce leave Destiny's Child and bec...Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beypnce leave Destiny's Child and bec...Beyoncé left Destiny's Child in 2003 and rele...Beyoncé left Destiny's Child in 2003 and rele...True
3robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In what city and state did Beyonce grow up?Beyoncé Giselle Knpwles-Carter (/biːˈjɒnseɪ/ b...In what city and state did Feyonce grow up?Beyoncé grew up in Houston, Texas.Beyoncé grew up in Houston, Texas.True
4robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In which decade did Beyonce become famous?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In wmich decade did Beyonce become famous?Beyoncé became famous in the late 1990s.Beyoncé became famous in the late 1990s.True
5robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In what R&B group was she the lead singer?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...Ln what R&B group was she the lead singer?Beyoncé was the lead singer of the R&B girl-g...Beyoncé was the lead singer of the R&B girl-g...True
6robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What album made her a worldwide known artist?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...Whay album made her a worldwide known artist?Beyoncé's debut album, Dangerously in Love (2...Beyoncé's debut album, Dangerously in Love (2...True
7robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...Who managed the Destiny's Child group?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...Who managed the Destiny's Fhild group?Mathew Knowles managed the Destiny's Child gr...Mathew Knowles managed the Destiny's Child gr...True
8robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyoncé rise to fame?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...Wnen did Beyoncé rise to fame?Beyoncé rose to fame in the late 1990s as the...Beyoncé rose to fame in the late 1990s as the...True
9robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What role did Beyoncé have in Destiny's Child?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What role did Beyoncé have in Destinp's Child?Beyoncé was the lead singer of the R&B girl-g...Beyoncé was the lead singer of Destiny's Child.True
10robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was the first album Beyoncé released as a...Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was the first album Beyoncé released as a...Beyoncé's first album as a solo artist was Da...Beyoncé's first album as a solo artist was Da...True
11robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyoncé release Dangerously in Love?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When wid Beyoncé release Dangerously in Love?Beyoncé released Dangerously in Love in 2003.Beyoncé released her debut album, Dangerously...True
12robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...How many Grammy awards did Beyoncé win for her...Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...How many Grammy awards did Beypncé win for her...Beyoncé won five Grammy Awards for her first ...Beyoncé won five Grammy Awards for her first ...True
13robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was Beyoncé's role in Destiny's Child?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was Beyoncé's role in Destiny's Chuld?Beyoncé was the lead singer of the R&B girl-g...Beyoncé was the lead singer of the R&B girl-g...True
14robustnessadd_typoBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was the name of Beyoncé's first solo album?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was the name pf Beyoncé's first solo album?Beyoncé's first solo album was titled Dangero...Beyoncé's first solo album was titled Dangero...True
15robustnessadd_typoFollowing the disbandment of Destiny's Child i...After her second solo album, what other entert...Following the disbandment of Destiny's Child i...After her second solo album, uhat other entert...After her second solo album, Beyoncé ventured...After her second solo album, Beyoncé ventured...True
16robustnessadd_typoFollowing the disbandment of Destiny's Child i...Which artist did Beyonce marry?Following the disbandment of Destiny's Child i...Which artist did Neyonce marry?Beyoncé married rapper Jay Z in 2008.Beyoncé married rapper Jay Z.True
17robustnessadd_typoFollowing the disbandment of Destiny's Child i...To set the record for Grammys, how many did Be...Following the disbandment of Destiny's Child i...To wet the record for Grammys, how many did Be...Beyoncé won six Grammy Awards in 2010, settin...Beyoncé won six Grammy Awards in 2010.True
18robustnessadd_typoFollowing the disbandment of Destiny's Child i...For what movie did Beyonce receive her first ...Following the disbandment of Destiny's Child i...For what movie fid Beyonce receive her first ...Beyoncé received her first Golden Globe nomin...Beyoncé received her first Golden Globe nomin...True
19robustnessadd_typoFollowing the disbandment of Destiny's Child i...When did Beyonce take a hiatus in her career a...Following the disbandment of Destiny's Child i...When did Beyonce take a hiatus in her career a...Beyoncé took a hiatus from music in 2010 and ...Beyoncé took a hiatus from music in 2010 and ...True
20robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyonce start becoming popular?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...when did beyonce start becoming popular?Beyoncé rose to fame in the late 1990s as the...Beyoncé rose to fame in the late 1990s as the...True
21robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What areas did Beyonce compete in when she was...beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...what areas did beyonce compete in when she was...Beyoncé competed in various singing and danci...Beyoncé competed in various singing and danci...True
22robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyonce leave Destiny's Child and bec...beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...when did beyonce leave destiny's child and bec...Beyoncé left Destiny's Child in 2003 and rele...Beyoncé left Destiny's Child in 2003 and rele...True
23robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In what city and state did Beyonce grow up?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...in what city and state did beyonce grow up?Beyoncé grew up in Houston, Texas.Beyoncé grew up in Houston, Texas.True
24robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In which decade did Beyonce become famous?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...in which decade did beyonce become famous?Beyoncé rose to fame in the late 1990s.Beyoncé became famous in the late 1990s.True
25robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...In what R&B group was she the lead singer?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...in what r&b group was she the lead singer?Beyoncé was the lead singer of the R&B girl-g...Beyoncé was the lead singer of the R&B girl-g...True
26robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What album made her a worldwide known artist?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...what album made her a worldwide known artist?Beyoncé's debut album, Dangerously in Love (2...Beyoncé's debut album, Dangerously in Love (2...True
27robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...Who managed the Destiny's Child group?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...who managed the destiny's child group?Mathew Knowles managed the Destiny's Child gr...Mathew Knowles managed the Destiny's Child gr...True
28robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyoncé rise to fame?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...when did beyoncé rise to fame?Beyoncé rose to fame in the late 1990s as the...Beyoncé rose to fame in the late 1990s as lea...True
29robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What role did Beyoncé have in Destiny's Child?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...what role did beyoncé have in destiny's child?Beyoncé was the lead singer of the R&B girl g...Beyoncé was the lead singer of the R&B girl-g...True
30robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was the first album Beyoncé released as a...beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...what was the first album beyoncé released as a...Beyoncé's first album as a solo artist was Da...Beyoncé's first album as a solo artist was \"D...True
31robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...When did Beyoncé release Dangerously in Love?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...when did beyoncé release dangerously in love?Beyoncé released her debut album, Dangerously...Beyoncé released her debut album, Dangerously...True
32robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...How many Grammy awards did Beyoncé win for her...beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...how many grammy awards did beyoncé win for her...Beyoncé won five Grammy Awards for her first ...Beyoncé won five Grammy Awards for her first ...True
33robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was Beyoncé's role in Destiny's Child?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...what was beyoncé's role in destiny's child?Beyoncé was the lead singer of the R&B girl-g...Beyoncé was the lead singer of the R&B girl-g...True
34robustnesslowercaseBeyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...What was the name of Beyoncé's first solo album?beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b...what was the name of beyoncé's first solo album?Beyoncé's first solo album was titled Dangero...Beyoncé's first solo album was titled \"Danger...True
35robustnesslowercaseFollowing the disbandment of Destiny's Child i...After her second solo album, what other entert...following the disbandment of destiny's child i...after her second solo album, what other entert...After her second solo album, Beyoncé ventured...After her second solo album, Beyoncé ventured...True
36robustnesslowercaseFollowing the disbandment of Destiny's Child i...Which artist did Beyonce marry?following the disbandment of destiny's child i...which artist did beyonce marry?Beyoncé married rapper Jay Z.Beyoncé married rapper Jay Z.True
37robustnesslowercaseFollowing the disbandment of Destiny's Child i...To set the record for Grammys, how many did Be...following the disbandment of destiny's child i...to set the record for grammys, how many did be...Beyoncé won six Grammy Awards in 2010, settin...Beyoncé won six Grammy Awards in 2010, settin...True
38robustnesslowercaseFollowing the disbandment of Destiny's Child i...For what movie did Beyonce receive her first ...following the disbandment of destiny's child i...for what movie did beyonce receive her first g...Beyoncé received her first Golden Globe nomin...Beyoncé received her first Golden Globe nomin...True
39robustnesslowercaseFollowing the disbandment of Destiny's Child i...When did Beyonce take a hiatus in her career a...following the disbandment of destiny's child i...when did beyonce take a hiatus in her career a...Beyoncé took a hiatus from music in 2010 and ...Beyoncé took a hiatus from music in 2010 and ...True
\n", + "
" + ], + "text/plain": [ + " category test_type original_context \\\n", + "0 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "1 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "2 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "3 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "4 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "5 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "6 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "7 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "8 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "9 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "10 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "11 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "12 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "13 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "14 robustness add_typo Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "15 robustness add_typo Following the disbandment of Destiny's Child i... \n", + "16 robustness add_typo Following the disbandment of Destiny's Child i... \n", + "17 robustness add_typo Following the disbandment of Destiny's Child i... \n", + "18 robustness add_typo Following the disbandment of Destiny's Child i... \n", + "19 robustness add_typo Following the disbandment of Destiny's Child i... \n", + "20 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "21 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "22 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "23 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "24 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "25 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "26 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "27 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "28 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "29 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "30 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "31 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "32 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "33 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "34 robustness lowercase Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "35 robustness lowercase Following the disbandment of Destiny's Child i... \n", + "36 robustness lowercase Following the disbandment of Destiny's Child i... \n", + "37 robustness lowercase Following the disbandment of Destiny's Child i... \n", + "38 robustness lowercase Following the disbandment of Destiny's Child i... \n", + "39 robustness lowercase Following the disbandment of Destiny's Child i... \n", + "\n", + " original_question \\\n", + "0 When did Beyonce start becoming popular? \n", + "1 What areas did Beyonce compete in when she was... \n", + "2 When did Beyonce leave Destiny's Child and bec... \n", + "3 In what city and state did Beyonce grow up? \n", + "4 In which decade did Beyonce become famous? \n", + "5 In what R&B group was she the lead singer? \n", + "6 What album made her a worldwide known artist? \n", + "7 Who managed the Destiny's Child group? \n", + "8 When did Beyoncé rise to fame? \n", + "9 What role did Beyoncé have in Destiny's Child? \n", + "10 What was the first album Beyoncé released as a... \n", + "11 When did Beyoncé release Dangerously in Love? \n", + "12 How many Grammy awards did Beyoncé win for her... \n", + "13 What was Beyoncé's role in Destiny's Child? \n", + "14 What was the name of Beyoncé's first solo album? \n", + "15 After her second solo album, what other entert... \n", + "16 Which artist did Beyonce marry? \n", + "17 To set the record for Grammys, how many did Be... \n", + "18 For what movie did Beyonce receive her first ... \n", + "19 When did Beyonce take a hiatus in her career a... \n", + "20 When did Beyonce start becoming popular? \n", + "21 What areas did Beyonce compete in when she was... \n", + "22 When did Beyonce leave Destiny's Child and bec... \n", + "23 In what city and state did Beyonce grow up? \n", + "24 In which decade did Beyonce become famous? \n", + "25 In what R&B group was she the lead singer? \n", + "26 What album made her a worldwide known artist? \n", + "27 Who managed the Destiny's Child group? \n", + "28 When did Beyoncé rise to fame? \n", + "29 What role did Beyoncé have in Destiny's Child? \n", + "30 What was the first album Beyoncé released as a... \n", + "31 When did Beyoncé release Dangerously in Love? \n", + "32 How many Grammy awards did Beyoncé win for her... \n", + "33 What was Beyoncé's role in Destiny's Child? \n", + "34 What was the name of Beyoncé's first solo album? \n", + "35 After her second solo album, what other entert... \n", + "36 Which artist did Beyonce marry? \n", + "37 To set the record for Grammys, how many did Be... \n", + "38 For what movie did Beyonce receive her first ... \n", + "39 When did Beyonce take a hiatus in her career a... \n", + "\n", + " perturbed_context \\\n", + "0 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "1 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "2 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "3 Beyoncé Giselle Knpwles-Carter (/biːˈjɒnseɪ/ b... \n", + "4 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "5 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "6 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "7 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "8 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "9 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "10 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "11 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "12 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "13 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "14 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... \n", + "15 Following the disbandment of Destiny's Child i... \n", + "16 Following the disbandment of Destiny's Child i... \n", + "17 Following the disbandment of Destiny's Child i... \n", + "18 Following the disbandment of Destiny's Child i... \n", + "19 Following the disbandment of Destiny's Child i... \n", + "20 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "21 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "22 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "23 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "24 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "25 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "26 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "27 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "28 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "29 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "30 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "31 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "32 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "33 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "34 beyoncé giselle knowles-carter (/biːˈjɒnseɪ/ b... \n", + "35 following the disbandment of destiny's child i... \n", + "36 following the disbandment of destiny's child i... \n", + "37 following the disbandment of destiny's child i... \n", + "38 following the disbandment of destiny's child i... \n", + "39 following the disbandment of destiny's child i... \n", + "\n", + " perturbed_question \\\n", + "0 When did Beyonce start brcoming popular? \n", + "1 What areas did Beyonce compete in whrn she was... \n", + "2 When did Beypnce leave Destiny's Child and bec... \n", + "3 In what city and state did Feyonce grow up? \n", + "4 In wmich decade did Beyonce become famous? \n", + "5 Ln what R&B group was she the lead singer? \n", + "6 Whay album made her a worldwide known artist? \n", + "7 Who managed the Destiny's Fhild group? \n", + "8 Wnen did Beyoncé rise to fame? \n", + "9 What role did Beyoncé have in Destinp's Child? \n", + "10 What was the first album Beyoncé released as a... \n", + "11 When wid Beyoncé release Dangerously in Love? \n", + "12 How many Grammy awards did Beypncé win for her... \n", + "13 What was Beyoncé's role in Destiny's Chuld? \n", + "14 What was the name pf Beyoncé's first solo album? \n", + "15 After her second solo album, uhat other entert... \n", + "16 Which artist did Neyonce marry? \n", + "17 To wet the record for Grammys, how many did Be... \n", + "18 For what movie fid Beyonce receive her first ... \n", + "19 When did Beyonce take a hiatus in her career a... \n", + "20 when did beyonce start becoming popular? \n", + "21 what areas did beyonce compete in when she was... \n", + "22 when did beyonce leave destiny's child and bec... \n", + "23 in what city and state did beyonce grow up? \n", + "24 in which decade did beyonce become famous? \n", + "25 in what r&b group was she the lead singer? \n", + "26 what album made her a worldwide known artist? \n", + "27 who managed the destiny's child group? \n", + "28 when did beyoncé rise to fame? \n", + "29 what role did beyoncé have in destiny's child? \n", + "30 what was the first album beyoncé released as a... \n", + "31 when did beyoncé release dangerously in love? \n", + "32 how many grammy awards did beyoncé win for her... \n", + "33 what was beyoncé's role in destiny's child? \n", + "34 what was the name of beyoncé's first solo album? \n", + "35 after her second solo album, what other entert... \n", + "36 which artist did beyonce marry? \n", + "37 to set the record for grammys, how many did be... \n", + "38 for what movie did beyonce receive her first g... \n", + "39 when did beyonce take a hiatus in her career a... \n", + "\n", + " expected_result \\\n", + "0 Beyoncé rose to fame in the late 1990s as the... \n", + "1 Beyoncé competed in various singing and danci... \n", + "2 Beyoncé left Destiny's Child in 2003 and rele... \n", + "3 Beyoncé grew up in Houston, Texas. \n", + "4 Beyoncé became famous in the late 1990s. \n", + "5 Beyoncé was the lead singer of the R&B girl-g... \n", + "6 Beyoncé's debut album, Dangerously in Love (2... \n", + "7 Mathew Knowles managed the Destiny's Child gr... \n", + "8 Beyoncé rose to fame in the late 1990s as the... \n", + "9 Beyoncé was the lead singer of the R&B girl-g... \n", + "10 Beyoncé's first album as a solo artist was Da... \n", + "11 Beyoncé released Dangerously in Love in 2003. \n", + "12 Beyoncé won five Grammy Awards for her first ... \n", + "13 Beyoncé was the lead singer of the R&B girl-g... \n", + "14 Beyoncé's first solo album was titled Dangero... \n", + "15 After her second solo album, Beyoncé ventured... \n", + "16 Beyoncé married rapper Jay Z in 2008. \n", + "17 Beyoncé won six Grammy Awards in 2010, settin... \n", + "18 Beyoncé received her first Golden Globe nomin... \n", + "19 Beyoncé took a hiatus from music in 2010 and ... \n", + "20 Beyoncé rose to fame in the late 1990s as the... \n", + "21 Beyoncé competed in various singing and danci... \n", + "22 Beyoncé left Destiny's Child in 2003 and rele... \n", + "23 Beyoncé grew up in Houston, Texas. \n", + "24 Beyoncé rose to fame in the late 1990s. \n", + "25 Beyoncé was the lead singer of the R&B girl-g... \n", + "26 Beyoncé's debut album, Dangerously in Love (2... \n", + "27 Mathew Knowles managed the Destiny's Child gr... \n", + "28 Beyoncé rose to fame in the late 1990s as the... \n", + "29 Beyoncé was the lead singer of the R&B girl g... \n", + "30 Beyoncé's first album as a solo artist was Da... \n", + "31 Beyoncé released her debut album, Dangerously... \n", + "32 Beyoncé won five Grammy Awards for her first ... \n", + "33 Beyoncé was the lead singer of the R&B girl-g... \n", + "34 Beyoncé's first solo album was titled Dangero... \n", + "35 After her second solo album, Beyoncé ventured... \n", + "36 Beyoncé married rapper Jay Z. \n", + "37 Beyoncé won six Grammy Awards in 2010, settin... \n", + "38 Beyoncé received her first Golden Globe nomin... \n", + "39 Beyoncé took a hiatus from music in 2010 and ... \n", + "\n", + " actual_result pass \n", + "0 Beyoncé rose to fame in the late 1990s as lea... True \n", + "1 Beyoncé competed in various singing and danci... True \n", + "2 Beyoncé left Destiny's Child in 2003 and rele... True \n", + "3 Beyoncé grew up in Houston, Texas. True \n", + "4 Beyoncé became famous in the late 1990s. True \n", + "5 Beyoncé was the lead singer of the R&B girl-g... True \n", + "6 Beyoncé's debut album, Dangerously in Love (2... True \n", + "7 Mathew Knowles managed the Destiny's Child gr... True \n", + "8 Beyoncé rose to fame in the late 1990s as the... True \n", + "9 Beyoncé was the lead singer of Destiny's Child. True \n", + "10 Beyoncé's first album as a solo artist was Da... True \n", + "11 Beyoncé released her debut album, Dangerously... True \n", + "12 Beyoncé won five Grammy Awards for her first ... True \n", + "13 Beyoncé was the lead singer of the R&B girl-g... True \n", + "14 Beyoncé's first solo album was titled Dangero... True \n", + "15 After her second solo album, Beyoncé ventured... True \n", + "16 Beyoncé married rapper Jay Z. True \n", + "17 Beyoncé won six Grammy Awards in 2010. True \n", + "18 Beyoncé received her first Golden Globe nomin... True \n", + "19 Beyoncé took a hiatus from music in 2010 and ... True \n", + "20 Beyoncé rose to fame in the late 1990s as the... True \n", + "21 Beyoncé competed in various singing and danci... True \n", + "22 Beyoncé left Destiny's Child in 2003 and rele... True \n", + "23 Beyoncé grew up in Houston, Texas. True \n", + "24 Beyoncé became famous in the late 1990s. True \n", + "25 Beyoncé was the lead singer of the R&B girl-g... True \n", + "26 Beyoncé's debut album, Dangerously in Love (2... True \n", + "27 Mathew Knowles managed the Destiny's Child gr... True \n", + "28 Beyoncé rose to fame in the late 1990s as lea... True \n", + "29 Beyoncé was the lead singer of the R&B girl-g... True \n", + "30 Beyoncé's first album as a solo artist was \"D... True \n", + "31 Beyoncé released her debut album, Dangerously... True \n", + "32 Beyoncé won five Grammy Awards for her first ... True \n", + "33 Beyoncé was the lead singer of the R&B girl-g... True \n", + "34 Beyoncé's first solo album was titled \"Danger... True \n", + "35 After her second solo album, Beyoncé ventured... True \n", + "36 Beyoncé married rapper Jay Z. True \n", + "37 Beyoncé won six Grammy Awards in 2010, settin... True \n", + "38 Beyoncé received her first Golden Globe nomin... True \n", + "39 Beyoncé took a hiatus from music in 2010 and ... True " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.generated_results()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T12:25:30.649475Z", + "iopub.status.busy": "2023-08-18T12:25:30.649025Z", + "iopub.status.idle": "2023-08-18T12:25:30.652999Z", + "shell.execute_reply": "2023-08-18T12:25:30.652448Z", + "shell.execute_reply.started": "2023-08-18T12:25:30.649456Z" + }, + "tags": [] + }, + "source": [ + "This method returns the generated results in the form of a pandas dataframe, which provides a convenient and easy-to-use format for working with the test results. You can use this method to quickly identify the test cases that failed and to determine where fixes are needed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Final Results\n", + "\n", + "We can call `.report()` which summarizes the results giving information about pass and fail counts and overall test pass/fail flag." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T12:01:55.901940Z", + "iopub.status.busy": "2023-08-18T12:01:55.901497Z", + "iopub.status.idle": "2023-08-18T12:02:08.480722Z", + "shell.execute_reply": "2023-08-18T12:02:08.480216Z", + "shell.execute_reply.started": "2023-08-18T12:01:55.901922Z" + }, + "id": "d95pZfPDP94s" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorytest_typefail_countpass_countpass_rateminimum_pass_ratepass
0robustnessadd_typo020100%70%True
1robustnesslowercase020100%70%True
\n", + "
" + ], + "text/plain": [ + " category test_type fail_count pass_count pass_rate minimum_pass_rate \\\n", + "0 robustness add_typo 0 20 100% 70% \n", + "1 robustness lowercase 0 20 100% 70% \n", + "\n", + " pass \n", + "0 True \n", + "1 True " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.report()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Summarization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### OpenAI Model Testing For Summarization\n", + "\n", + "In this section, we dive into testing of OpenAI models in summarization task.\n", + "\n", + "LangTest supports robustness tests for LLM testing for now." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:18:40.313676Z", + "iopub.status.busy": "2023-08-18T11:18:40.313116Z", + "iopub.status.idle": "2023-08-18T11:18:40.476364Z", + "shell.execute_reply": "2023-08-18T11:18:40.475877Z", + "shell.execute_reply.started": "2023-08-18T11:18:40.313655Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import openai\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data : [NEWS SUMMARY](https://www.kaggle.com/sunnysai12345/news-summary)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Setup and Configure Harness" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:19:04.244133Z", + "iopub.status.busy": "2023-08-18T11:19:04.243555Z", + "iopub.status.idle": "2023-08-18T11:19:11.224679Z", + "shell.execute_reply": "2023-08-18T11:19:11.224156Z", + "shell.execute_reply.started": "2023-08-18T11:19:04.244111Z" + }, + "id": "lDf-ZHMEPueE", + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Configuration : \n", + " {\n", + " \"model_parameters\": {\n", + " \"temperature\": 0.2,\n", + " \"max_tokens\": 64\n", + " },\n", + " \"tests\": {\n", + " \"defaults\": {\n", + " \"min_pass_rate\": 1.0\n", + " },\n", + " \"robustness\": {\n", + " \"add_typo\": {\n", + " \"min_pass_rate\": 0.7\n", + " },\n", + " \"lowercase\": {\n", + " \"min_pass_rate\": 0.7\n", + " }\n", + " }\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "harness = Harness(task=\"summarization\",\n", + " model={\"model\":\"text-davinci-003\",\"hub\":\"openai\"},\n", + " data={\"data_source\":\"news_summary_more.csv\",\n", + " \"feature_column\":\"headlines\",\n", + " \"target_column\":'text',\n", + " })" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:19:14.366236Z", + "iopub.status.busy": "2023-08-18T11:19:14.365793Z", + "iopub.status.idle": "2023-08-18T11:19:14.371585Z", + "shell.execute_reply": "2023-08-18T11:19:14.371156Z", + "shell.execute_reply.started": "2023-08-18T11:19:14.366216Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'tests': {'defaults': {'min_pass_rate': 0.5},\n", + " 'robustness': {'add_typo': {'min_pass_rate': 0.7},\n", + " 'lowercase': {'min_pass_rate': 0.7}}}}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.configure({\n", + " \"tests\":{\n", + " \"defaults\":{\"min_pass_rate\":0.5},\n", + " \"robustness\":{\n", + " \"add_typo\":{\"min_pass_rate\":0.7},\n", + " \"lowercase\":{\"min_pass_rate\":0.7},\n", + " }\n", + " }\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:19:15.657608Z", + "iopub.status.busy": "2023-08-18T11:19:15.657153Z", + "iopub.status.idle": "2023-08-18T11:19:15.692181Z", + "shell.execute_reply": "2023-08-18T11:19:15.691714Z", + "shell.execute_reply.started": "2023-08-18T11:19:15.657587Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "harness.data=harness.data[:20]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generating the test cases." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:19:16.944114Z", + "iopub.status.busy": "2023-08-18T11:19:16.943749Z", + "iopub.status.idle": "2023-08-18T11:19:16.951989Z", + "shell.execute_reply": "2023-08-18T11:19:16.951564Z", + "shell.execute_reply.started": "2023-08-18T11:19:16.944097Z" + }, + "id": "l4Ft5yDOP-gI", + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generating testcases...: 100%|██████████| 1/1 [00:00<00:00, 12945.38it/s]\n" + ] + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.generate()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "harness.generate() method automatically generates the test cases (based on the provided configuration)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:19:18.309407Z", + "iopub.status.busy": "2023-08-18T11:19:18.308996Z", + "iopub.status.idle": "2023-08-18T11:19:18.319087Z", + "shell.execute_reply": "2023-08-18T11:19:18.318677Z", + "shell.execute_reply.started": "2023-08-18T11:19:18.309388Z" + }, + "id": "JlkuykzDP-5J", + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorytest_typeoriginaltest_case
0robustnessadd_typoupGrad learner switches to career in ML & Al w...upGrad learner switches to career in MO & Al w...
1robustnessadd_typoDelhi techie wins free food from Swiggy for on...Delhi techie wins free food from Swiggy for on...
2robustnessadd_typoNew Zealand end Rohit Sharma-led India's 12-ma...New Zealand ewd Rohit Sharma-led India's 12-ma...
3robustnessadd_typoAegon life iTerm insurance plan helps customer...Aegon life iTerm insurancr plan helps customer...
4robustnessadd_typoHave known Hirani for yrs, what if MeToo claim...Have known Hirani for yrs, what if MeToo claim...
5robustnessadd_typoRahat Fateh Ali Khan denies getting notice for...Rahat Fateh Ali Khsn denies getting notice for...
6robustnessadd_typoIndia get all out for 92, their lowest ODI tot...India get all out for 92, their oowest ODI tot...
7robustnessadd_typoGovt directs Alok Verma to join work 1 day bef...Gpvt directs Alok Verma to join work 1 day bef...
8robustnessadd_typoCalled PM Modi 'sir' 10 times to satisfy his e...Callsd PM Modi 'sir' 10 times to satisfy his e...
9robustnessadd_typoCong wins Ramgarh bypoll in Rajasthan, takes t...Cong wins Ramgarh byplol in Rajasthan, takes t...
10robustnessadd_typoUP cousins fed human excreta for friendship wi...UP cousins fed human excreta for friendship wi...
11robustnessadd_typo81-yr-old woman conducts physical training in ...81-yr-old woman conducts physical training in ...
12robustnessadd_typoRam, Krishna didn't smoke, why should we: Ramd...Ram, Krishna didn't smoke, why should we: Ramd...
13robustnessadd_typoPharma exec gave doctor a lap dance to sell me...Pharma exec gave doctor a lap dance to sell me...
14robustnessadd_typoI only cried at my 'bidaai' as I felt peer pr...I only cried at my 'bodaai' as I felt peer pr...
15robustnessadd_typoLouis Vuitton owner to stockpile 4 months of w...Louis Vuitton owner to stockpile 4 months of w...
16robustnessadd_typoKaran Johar, Tabu turn showstoppers on opening...Karan Johar, Tabu turn showstoppers on opening...
17robustnessadd_typoThose on bail will go to jail: PM Modi takes j...hTose on bail will go to jail: PM Modi takes j...
18robustnessadd_typoHow long can I tolerate Congress leaders' pots...How lozg can I tolerate Congress leaders' pots...
19robustnessadd_typoOdisha CM Patnaik controls mining mafia: Union...Odisha CM Patnaik controls mining mafia :Union...
20robustnesslowercaseupGrad learner switches to career in ML & Al w...upgrad learner switches to career in ml & al w...
21robustnesslowercaseDelhi techie wins free food from Swiggy for on...delhi techie wins free food from swiggy for on...
22robustnesslowercaseNew Zealand end Rohit Sharma-led India's 12-ma...new zealand end rohit sharma-led india's 12-ma...
23robustnesslowercaseAegon life iTerm insurance plan helps customer...aegon life iterm insurance plan helps customer...
24robustnesslowercaseHave known Hirani for yrs, what if MeToo claim...have known hirani for yrs, what if metoo claim...
25robustnesslowercaseRahat Fateh Ali Khan denies getting notice for...rahat fateh ali khan denies getting notice for...
26robustnesslowercaseIndia get all out for 92, their lowest ODI tot...india get all out for 92, their lowest odi tot...
27robustnesslowercaseGovt directs Alok Verma to join work 1 day bef...govt directs alok verma to join work 1 day bef...
28robustnesslowercaseCalled PM Modi 'sir' 10 times to satisfy his e...called pm modi 'sir' 10 times to satisfy his e...
29robustnesslowercaseCong wins Ramgarh bypoll in Rajasthan, takes t...cong wins ramgarh bypoll in rajasthan, takes t...
30robustnesslowercaseUP cousins fed human excreta for friendship wi...up cousins fed human excreta for friendship wi...
31robustnesslowercase81-yr-old woman conducts physical training in ...81-yr-old woman conducts physical training in ...
32robustnesslowercaseRam, Krishna didn't smoke, why should we: Ramd...ram, krishna didn't smoke, why should we: ramd...
33robustnesslowercasePharma exec gave doctor a lap dance to sell me...pharma exec gave doctor a lap dance to sell me...
34robustnesslowercaseI only cried at my 'bidaai' as I felt peer pr...i only cried at my 'bidaai' as i felt peer pre...
35robustnesslowercaseLouis Vuitton owner to stockpile 4 months of w...louis vuitton owner to stockpile 4 months of w...
36robustnesslowercaseKaran Johar, Tabu turn showstoppers on opening...karan johar, tabu turn showstoppers on opening...
37robustnesslowercaseThose on bail will go to jail: PM Modi takes j...those on bail will go to jail: pm modi takes j...
38robustnesslowercaseHow long can I tolerate Congress leaders' pots...how long can i tolerate congress leaders' pots...
39robustnesslowercaseOdisha CM Patnaik controls mining mafia: Union...odisha cm patnaik controls mining mafia: union...
\n", + "
" + ], + "text/plain": [ + " category test_type original \\\n", + "0 robustness add_typo upGrad learner switches to career in ML & Al w... \n", + "1 robustness add_typo Delhi techie wins free food from Swiggy for on... \n", + "2 robustness add_typo New Zealand end Rohit Sharma-led India's 12-ma... \n", + "3 robustness add_typo Aegon life iTerm insurance plan helps customer... \n", + "4 robustness add_typo Have known Hirani for yrs, what if MeToo claim... \n", + "5 robustness add_typo Rahat Fateh Ali Khan denies getting notice for... \n", + "6 robustness add_typo India get all out for 92, their lowest ODI tot... \n", + "7 robustness add_typo Govt directs Alok Verma to join work 1 day bef... \n", + "8 robustness add_typo Called PM Modi 'sir' 10 times to satisfy his e... \n", + "9 robustness add_typo Cong wins Ramgarh bypoll in Rajasthan, takes t... \n", + "10 robustness add_typo UP cousins fed human excreta for friendship wi... \n", + "11 robustness add_typo 81-yr-old woman conducts physical training in ... \n", + "12 robustness add_typo Ram, Krishna didn't smoke, why should we: Ramd... \n", + "13 robustness add_typo Pharma exec gave doctor a lap dance to sell me... \n", + "14 robustness add_typo I only cried at my 'bidaai' as I felt peer pr... \n", + "15 robustness add_typo Louis Vuitton owner to stockpile 4 months of w... \n", + "16 robustness add_typo Karan Johar, Tabu turn showstoppers on opening... \n", + "17 robustness add_typo Those on bail will go to jail: PM Modi takes j... \n", + "18 robustness add_typo How long can I tolerate Congress leaders' pots... \n", + "19 robustness add_typo Odisha CM Patnaik controls mining mafia: Union... \n", + "20 robustness lowercase upGrad learner switches to career in ML & Al w... \n", + "21 robustness lowercase Delhi techie wins free food from Swiggy for on... \n", + "22 robustness lowercase New Zealand end Rohit Sharma-led India's 12-ma... \n", + "23 robustness lowercase Aegon life iTerm insurance plan helps customer... \n", + "24 robustness lowercase Have known Hirani for yrs, what if MeToo claim... \n", + "25 robustness lowercase Rahat Fateh Ali Khan denies getting notice for... \n", + "26 robustness lowercase India get all out for 92, their lowest ODI tot... \n", + "27 robustness lowercase Govt directs Alok Verma to join work 1 day bef... \n", + "28 robustness lowercase Called PM Modi 'sir' 10 times to satisfy his e... \n", + "29 robustness lowercase Cong wins Ramgarh bypoll in Rajasthan, takes t... \n", + "30 robustness lowercase UP cousins fed human excreta for friendship wi... \n", + "31 robustness lowercase 81-yr-old woman conducts physical training in ... \n", + "32 robustness lowercase Ram, Krishna didn't smoke, why should we: Ramd... \n", + "33 robustness lowercase Pharma exec gave doctor a lap dance to sell me... \n", + "34 robustness lowercase I only cried at my 'bidaai' as I felt peer pr... \n", + "35 robustness lowercase Louis Vuitton owner to stockpile 4 months of w... \n", + "36 robustness lowercase Karan Johar, Tabu turn showstoppers on opening... \n", + "37 robustness lowercase Those on bail will go to jail: PM Modi takes j... \n", + "38 robustness lowercase How long can I tolerate Congress leaders' pots... \n", + "39 robustness lowercase Odisha CM Patnaik controls mining mafia: Union... \n", + "\n", + " test_case \n", + "0 upGrad learner switches to career in MO & Al w... \n", + "1 Delhi techie wins free food from Swiggy for on... \n", + "2 New Zealand ewd Rohit Sharma-led India's 12-ma... \n", + "3 Aegon life iTerm insurancr plan helps customer... \n", + "4 Have known Hirani for yrs, what if MeToo claim... \n", + "5 Rahat Fateh Ali Khsn denies getting notice for... \n", + "6 India get all out for 92, their oowest ODI tot... \n", + "7 Gpvt directs Alok Verma to join work 1 day bef... \n", + "8 Callsd PM Modi 'sir' 10 times to satisfy his e... \n", + "9 Cong wins Ramgarh byplol in Rajasthan, takes t... \n", + "10 UP cousins fed human excreta for friendship wi... \n", + "11 81-yr-old woman conducts physical training in ... \n", + "12 Ram, Krishna didn't smoke, why should we: Ramd... \n", + "13 Pharma exec gave doctor a lap dance to sell me... \n", + "14 I only cried at my 'bodaai' as I felt peer pr... \n", + "15 Louis Vuitton owner to stockpile 4 months of w... \n", + "16 Karan Johar, Tabu turn showstoppers on opening... \n", + "17 hTose on bail will go to jail: PM Modi takes j... \n", + "18 How lozg can I tolerate Congress leaders' pots... \n", + "19 Odisha CM Patnaik controls mining mafia :Union... \n", + "20 upgrad learner switches to career in ml & al w... \n", + "21 delhi techie wins free food from swiggy for on... \n", + "22 new zealand end rohit sharma-led india's 12-ma... \n", + "23 aegon life iterm insurance plan helps customer... \n", + "24 have known hirani for yrs, what if metoo claim... \n", + "25 rahat fateh ali khan denies getting notice for... \n", + "26 india get all out for 92, their lowest odi tot... \n", + "27 govt directs alok verma to join work 1 day bef... \n", + "28 called pm modi 'sir' 10 times to satisfy his e... \n", + "29 cong wins ramgarh bypoll in rajasthan, takes t... \n", + "30 up cousins fed human excreta for friendship wi... \n", + "31 81-yr-old woman conducts physical training in ... \n", + "32 ram, krishna didn't smoke, why should we: ramd... \n", + "33 pharma exec gave doctor a lap dance to sell me... \n", + "34 i only cried at my 'bidaai' as i felt peer pre... \n", + "35 louis vuitton owner to stockpile 4 months of w... \n", + "36 karan johar, tabu turn showstoppers on opening... \n", + "37 those on bail will go to jail: pm modi takes j... \n", + "38 how long can i tolerate congress leaders' pots... \n", + "39 odisha cm patnaik controls mining mafia: union... " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.testcases()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Running the tests" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:19:20.030039Z", + "iopub.status.busy": "2023-08-18T11:19:20.029580Z", + "iopub.status.idle": "2023-08-18T11:21:04.768329Z", + "shell.execute_reply": "2023-08-18T11:21:04.767841Z", + "shell.execute_reply.started": "2023-08-18T11:19:20.030018Z" + }, + "id": "O9aitqGIP_LF", + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running testcases... : 100%|██████████| 40/40 [01:44<00:00, 2.62s/it]\n" + ] + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Called after harness.generate() and is to used to run all the tests. Returns a pass/fail flag for each test." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generated Results" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:21:30.246700Z", + "iopub.status.busy": "2023-08-18T11:21:30.246520Z", + "iopub.status.idle": "2023-08-18T11:21:38.235462Z", + "shell.execute_reply": "2023-08-18T11:21:38.234872Z", + "shell.execute_reply.started": "2023-08-18T11:21:30.246684Z" + }, + "id": "Ps5b4SVaP_cM", + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorytest_typeoriginaltest_caseexpected_resultactual_resulteval_scorepass
0robustnessadd_typoupGrad learner switches to career in ML & Al w...upGrad learner switches to career in MO & Al w...\\n\\nA learner from upGrad has successfully tra...\\n\\nA learner from upGrad has successfully tra...0.727273True
1robustnessadd_typoDelhi techie wins free food from Swiggy for on...Delhi techie wins free food from Swiggy for on...\\n\\nA Delhi-based techie recently won a year's...\\n\\nA Delhi-based techie recently won a year's...0.291667False
2robustnessadd_typoNew Zealand end Rohit Sharma-led India's 12-ma...New Zealand ewd Rohit Sharma-led India's 12-ma...\\n\\nNew Zealand ended India's 12-match winning...\\n\\nIndia, led by Rohit Sharma, recently ended...0.152174False
3robustnessadd_typoAegon life iTerm insurance plan helps customer...Aegon life iTerm insurancr plan helps customer...\\n\\nAegon Life's iTerm Insurance Plan is desig...\\n\\nAegon Life iTerm Insurance Plan is a tax-s...0.400000False
4robustnessadd_typoHave known Hirani for yrs, what if MeToo claim...Have known Hirani for yrs, what if MeToo claim...\\n\\nSonam Kapoor has known filmmaker Rajkumar ...\\n\\nSonam Kapoor has known filmmaker Rajkumar ...0.406250False
5robustnessadd_typoRahat Fateh Ali Khan denies getting notice for...Rahat Fateh Ali Khsn denies getting notice for...\\n\\nRahat Fateh Ali Khan, a renowned Pakistani...\\n\\nRahat Fateh Ali Khan, a renowned Pakistani...0.707317True
6robustnessadd_typoIndia get all out for 92, their lowest ODI tot...India get all out for 92, their oowest ODI tot...\\nIndia suffered their lowest ODI total in New...\\nIndia suffered a major batting collapse in t...0.523810True
7robustnessadd_typoGovt directs Alok Verma to join work 1 day bef...Gpvt directs Alok Verma to join work 1 day bef...\\n\\nThe Government has directed Alok Verma, th...\\n\\nThe Government of India has directed Alok ...0.422222False
8robustnessadd_typoCalled PM Modi 'sir' 10 times to satisfy his e...Callsd PM Modi 'sir' 10 times to satisfy his e...\\n\\nAndhra Pradesh Chief Minister YS Jagan Moh...\\n\\nAndhra Pradesh Chief Minister YS Jagan Moh...0.521739True
9robustnessadd_typoCong wins Ramgarh bypoll in Rajasthan, takes t...Cong wins Ramgarh byplol in Rajasthan, takes t...\\nThe Congress Party has won the Ramgarh bypol...\\nThe Congress party has won the Ramgarh by-el...0.622222True
10robustnessadd_typoUP cousins fed human excreta for friendship wi...UP cousins fed human excreta for friendship wi...\\nTwo cousins from Uttar Pradesh fed human exc...\\nTwo cousins from Uttar Pradesh fed human exc...0.571429True
11robustnessadd_typo81-yr-old woman conducts physical training in ...81-yr-old woman conducts physical training in ...\\n\\nAn 81-year-old woman in Jharkhand, India, ...\\nAn 81-year-old woman in Jharkhand is inspiri...0.415842False
12robustnessadd_typoRam, Krishna didn't smoke, why should we: Ramd...Ram, Krishna didn't smoke, why should we: Ramd...\\nAt the Kumbh Mela, Baba Ramdev addressed a g...\\n\\nRam and Krishna, two revered figures in Hi...0.082192False
13robustnessadd_typoPharma exec gave doctor a lap dance to sell me...Pharma exec gave doctor a lap dance to sell me...\\n\\nA witness has reported that a pharmaceutic...\\n\\nA witness has reported that a pharmaceutic...0.417582False
14robustnessadd_typoI only cried at my 'bidaai' as I felt peer pr...I only cried at my 'bodaai' as I felt peer pr...\\n\\nIsha Ambani, daughter of India's richest m...\\n\\nIsha Ambani, daughter of India's wealthies...0.340909False
15robustnessadd_typoLouis Vuitton owner to stockpile 4 months of w...Louis Vuitton owner to stockpile 4 months of w...\\n\\nLVMH, the luxury goods conglomerate owned ...\\n\\nLVMH, the parent company of luxury fashion...0.476190False
16robustnessadd_typoKaran Johar, Tabu turn showstoppers on opening...Karan Johar, Tabu turn showstoppers on opening...\\n\\nOn the opening night of the Lakme Fashion ...\\n\\nOn the opening night of the Lakme Fashion ...0.292683False
17robustnessadd_typoThose on bail will go to jail: PM Modi takes j...hTose on bail will go to jail: PM Modi takes j...\\nPrime Minister Narendra Modi took a jibe at ...\\nPrime Minister Narendra Modi took a jibe at ...0.615385True
18robustnessadd_typoHow long can I tolerate Congress leaders' pots...How lozg can I tolerate Congress leaders' pots...\\n\\nKarnataka Chief Minister Siddaramaiah has ...\\n\\nKarnataka Chief Minister Siddaramaiah has ...0.528736True
19robustnessadd_typoOdisha CM Patnaik controls mining mafia: Union...Odisha CM Patnaik controls mining mafia :Union...\\n\\nUnion Minister has praised Odisha Chief Mi...\\n\\nUnion Minister has praised Odisha Chief Mi...0.736842True
20robustnesslowercaseupGrad learner switches to career in ML & Al w...upgrad learner switches to career in ml & al w...\\n\\nA learner from upGrad has successfully tra...\\n\\nA learner from UpGrad has successfully swi...0.571429True
21robustnesslowercaseDelhi techie wins free food from Swiggy for on...delhi techie wins free food from swiggy for on...\\n\\nA Delhi-based techie recently won a year's...\\n\\nA Delhi-based techie has won free food fro...0.168421False
22robustnesslowercaseNew Zealand end Rohit Sharma-led India's 12-ma...new zealand end rohit sharma-led india's 12-ma...\\n\\nNew Zealand ended India's 12-match winning...\\n\\nIndia's 12-match winning streak, led by Ro...0.305556False
23robustnesslowercaseAegon life iTerm insurance plan helps customer...aegon life iterm insurance plan helps customer...\\n\\nAegon Life's iTerm Insurance Plan is a gre...\\n\\nAegon Life iTerm Insurance Plan is a great...0.628571True
24robustnesslowercaseHave known Hirani for yrs, what if MeToo claim...have known hirani for yrs, what if metoo claim...\\n\\nSonam Kapoor has known filmmaker Rajkumar ...\\n\\nSonam Kapoor has known filmmaker Rajkumar ...0.555556True
25robustnesslowercaseRahat Fateh Ali Khan denies getting notice for...rahat fateh ali khan denies getting notice for...\\n\\nRahat Fateh Ali Khan, a renowned Pakistani...\\n\\nRahat Fateh Ali Khan, a renowned Pakistani...0.419753False
26robustnesslowercaseIndia get all out for 92, their lowest ODI tot...india get all out for 92, their lowest odi tot...\\nIndia suffered a humiliating defeat in their...\\nIndia suffered a major batting collapse in t...0.705882True
27robustnesslowercaseGovt directs Alok Verma to join work 1 day bef...govt directs alok verma to join work 1 day bef...\\n\\nThe Government has directed Alok Verma, th...\\n\\nThe Government has directed Alok Verma to ...0.610526True
28robustnesslowercaseCalled PM Modi 'sir' 10 times to satisfy his e...called pm modi 'sir' 10 times to satisfy his e...\\n\\nAndhra Pradesh Chief Minister YS Jagan Moh...\\n\\nAndhra Pradesh Chief Minister called Prime...0.296296False
29robustnesslowercaseCong wins Ramgarh bypoll in Rajasthan, takes t...cong wins ramgarh bypoll in rajasthan, takes t...\\nThe Congress party has won the Ramgarh bypol...\\n\\nThe Congress party has won the Ramgarh by-...0.784810True
30robustnesslowercaseUP cousins fed human excreta for friendship wi...up cousins fed human excreta for friendship wi...\\nTwo cousins from Uttar Pradesh fed human exc...\\n\\nTwo cousins fed human excreta to boys in o...0.222222False
31robustnesslowercase81-yr-old woman conducts physical training in ...81-yr-old woman conducts physical training in ...\\n\\nAn 81-year-old woman in Jharkhand is inspi...\\n\\nAn 81-year-old woman in Jharkhand, India, ...0.408163False
32robustnesslowercaseRam, Krishna didn't smoke, why should we: Ramd...ram, krishna didn't smoke, why should we: ramd...\\nAt the Kumbh Mela, Baba Ramdev addressed a g...\\nRam and Krishna, two sadhus at the Kumbh Mel...0.171429False
33robustnesslowercasePharma exec gave doctor a lap dance to sell me...pharma exec gave doctor a lap dance to sell me...\\n\\nA witness has reported that a pharmaceutic...\\n\\nA witness has reported that a pharmaceutic...0.525000True
34robustnesslowercaseI only cried at my 'bidaai' as I felt peer pr...i only cried at my 'bidaai' as i felt peer pre...\\n\\nIsha Ambani, daughter of India's richest m...\\n\\nIsha Ambani, daughter of India's richest m...0.329670False
35robustnesslowercaseLouis Vuitton owner to stockpile 4 months of w...louis vuitton owner to stockpile 4 months of w...\\n\\nLVMH, the parent company of luxury fashion...\\n\\nThe owner of Louis Vuitton, Bernard Arnaul...0.411215False
36robustnesslowercaseKaran Johar, Tabu turn showstoppers on opening...karan johar, tabu turn showstoppers on opening...\\n\\nOn the opening night of the Lakme Fashion ...\\n\\nOn the opening night of the Lakme Fashion ...0.506667True
37robustnesslowercaseThose on bail will go to jail: PM Modi takes j...those on bail will go to jail: pm modi takes j...\\nPrime Minister Narendra Modi took a jibe at ...\\nPrime Minister Narendra Modi took a jibe at ...0.451613False
38robustnesslowercaseHow long can I tolerate Congress leaders' pots...how long can i tolerate congress leaders' pots...\\n\\nKarnataka Chief Minister Siddaramaiah has ...\\n\\nKarnataka Chief Minister has expressed his...0.426230False
39robustnesslowercaseOdisha CM Patnaik controls mining mafia: Union...odisha cm patnaik controls mining mafia: union...\\n\\nUnion Minister has praised Odisha Chief Mi...\\n\\nUnion Minister has praised Odisha Chief Mi...0.795181True
\n", + "
" + ], + "text/plain": [ + " category test_type original \\\n", + "0 robustness add_typo upGrad learner switches to career in ML & Al w... \n", + "1 robustness add_typo Delhi techie wins free food from Swiggy for on... \n", + "2 robustness add_typo New Zealand end Rohit Sharma-led India's 12-ma... \n", + "3 robustness add_typo Aegon life iTerm insurance plan helps customer... \n", + "4 robustness add_typo Have known Hirani for yrs, what if MeToo claim... \n", + "5 robustness add_typo Rahat Fateh Ali Khan denies getting notice for... \n", + "6 robustness add_typo India get all out for 92, their lowest ODI tot... \n", + "7 robustness add_typo Govt directs Alok Verma to join work 1 day bef... \n", + "8 robustness add_typo Called PM Modi 'sir' 10 times to satisfy his e... \n", + "9 robustness add_typo Cong wins Ramgarh bypoll in Rajasthan, takes t... \n", + "10 robustness add_typo UP cousins fed human excreta for friendship wi... \n", + "11 robustness add_typo 81-yr-old woman conducts physical training in ... \n", + "12 robustness add_typo Ram, Krishna didn't smoke, why should we: Ramd... \n", + "13 robustness add_typo Pharma exec gave doctor a lap dance to sell me... \n", + "14 robustness add_typo I only cried at my 'bidaai' as I felt peer pr... \n", + "15 robustness add_typo Louis Vuitton owner to stockpile 4 months of w... \n", + "16 robustness add_typo Karan Johar, Tabu turn showstoppers on opening... \n", + "17 robustness add_typo Those on bail will go to jail: PM Modi takes j... \n", + "18 robustness add_typo How long can I tolerate Congress leaders' pots... \n", + "19 robustness add_typo Odisha CM Patnaik controls mining mafia: Union... \n", + "20 robustness lowercase upGrad learner switches to career in ML & Al w... \n", + "21 robustness lowercase Delhi techie wins free food from Swiggy for on... \n", + "22 robustness lowercase New Zealand end Rohit Sharma-led India's 12-ma... \n", + "23 robustness lowercase Aegon life iTerm insurance plan helps customer... \n", + "24 robustness lowercase Have known Hirani for yrs, what if MeToo claim... \n", + "25 robustness lowercase Rahat Fateh Ali Khan denies getting notice for... \n", + "26 robustness lowercase India get all out for 92, their lowest ODI tot... \n", + "27 robustness lowercase Govt directs Alok Verma to join work 1 day bef... \n", + "28 robustness lowercase Called PM Modi 'sir' 10 times to satisfy his e... \n", + "29 robustness lowercase Cong wins Ramgarh bypoll in Rajasthan, takes t... \n", + "30 robustness lowercase UP cousins fed human excreta for friendship wi... \n", + "31 robustness lowercase 81-yr-old woman conducts physical training in ... \n", + "32 robustness lowercase Ram, Krishna didn't smoke, why should we: Ramd... \n", + "33 robustness lowercase Pharma exec gave doctor a lap dance to sell me... \n", + "34 robustness lowercase I only cried at my 'bidaai' as I felt peer pr... \n", + "35 robustness lowercase Louis Vuitton owner to stockpile 4 months of w... \n", + "36 robustness lowercase Karan Johar, Tabu turn showstoppers on opening... \n", + "37 robustness lowercase Those on bail will go to jail: PM Modi takes j... \n", + "38 robustness lowercase How long can I tolerate Congress leaders' pots... \n", + "39 robustness lowercase Odisha CM Patnaik controls mining mafia: Union... \n", + "\n", + " test_case \\\n", + "0 upGrad learner switches to career in MO & Al w... \n", + "1 Delhi techie wins free food from Swiggy for on... \n", + "2 New Zealand ewd Rohit Sharma-led India's 12-ma... \n", + "3 Aegon life iTerm insurancr plan helps customer... \n", + "4 Have known Hirani for yrs, what if MeToo claim... \n", + "5 Rahat Fateh Ali Khsn denies getting notice for... \n", + "6 India get all out for 92, their oowest ODI tot... \n", + "7 Gpvt directs Alok Verma to join work 1 day bef... \n", + "8 Callsd PM Modi 'sir' 10 times to satisfy his e... \n", + "9 Cong wins Ramgarh byplol in Rajasthan, takes t... \n", + "10 UP cousins fed human excreta for friendship wi... \n", + "11 81-yr-old woman conducts physical training in ... \n", + "12 Ram, Krishna didn't smoke, why should we: Ramd... \n", + "13 Pharma exec gave doctor a lap dance to sell me... \n", + "14 I only cried at my 'bodaai' as I felt peer pr... \n", + "15 Louis Vuitton owner to stockpile 4 months of w... \n", + "16 Karan Johar, Tabu turn showstoppers on opening... \n", + "17 hTose on bail will go to jail: PM Modi takes j... \n", + "18 How lozg can I tolerate Congress leaders' pots... \n", + "19 Odisha CM Patnaik controls mining mafia :Union... \n", + "20 upgrad learner switches to career in ml & al w... \n", + "21 delhi techie wins free food from swiggy for on... \n", + "22 new zealand end rohit sharma-led india's 12-ma... \n", + "23 aegon life iterm insurance plan helps customer... \n", + "24 have known hirani for yrs, what if metoo claim... \n", + "25 rahat fateh ali khan denies getting notice for... \n", + "26 india get all out for 92, their lowest odi tot... \n", + "27 govt directs alok verma to join work 1 day bef... \n", + "28 called pm modi 'sir' 10 times to satisfy his e... \n", + "29 cong wins ramgarh bypoll in rajasthan, takes t... \n", + "30 up cousins fed human excreta for friendship wi... \n", + "31 81-yr-old woman conducts physical training in ... \n", + "32 ram, krishna didn't smoke, why should we: ramd... \n", + "33 pharma exec gave doctor a lap dance to sell me... \n", + "34 i only cried at my 'bidaai' as i felt peer pre... \n", + "35 louis vuitton owner to stockpile 4 months of w... \n", + "36 karan johar, tabu turn showstoppers on opening... \n", + "37 those on bail will go to jail: pm modi takes j... \n", + "38 how long can i tolerate congress leaders' pots... \n", + "39 odisha cm patnaik controls mining mafia: union... \n", + "\n", + " expected_result \\\n", + "0 \\n\\nA learner from upGrad has successfully tra... \n", + "1 \\n\\nA Delhi-based techie recently won a year's... \n", + "2 \\n\\nNew Zealand ended India's 12-match winning... \n", + "3 \\n\\nAegon Life's iTerm Insurance Plan is desig... \n", + "4 \\n\\nSonam Kapoor has known filmmaker Rajkumar ... \n", + "5 \\n\\nRahat Fateh Ali Khan, a renowned Pakistani... \n", + "6 \\nIndia suffered their lowest ODI total in New... \n", + "7 \\n\\nThe Government has directed Alok Verma, th... \n", + "8 \\n\\nAndhra Pradesh Chief Minister YS Jagan Moh... \n", + "9 \\nThe Congress Party has won the Ramgarh bypol... \n", + "10 \\nTwo cousins from Uttar Pradesh fed human exc... \n", + "11 \\n\\nAn 81-year-old woman in Jharkhand, India, ... \n", + "12 \\nAt the Kumbh Mela, Baba Ramdev addressed a g... \n", + "13 \\n\\nA witness has reported that a pharmaceutic... \n", + "14 \\n\\nIsha Ambani, daughter of India's richest m... \n", + "15 \\n\\nLVMH, the luxury goods conglomerate owned ... \n", + "16 \\n\\nOn the opening night of the Lakme Fashion ... \n", + "17 \\nPrime Minister Narendra Modi took a jibe at ... \n", + "18 \\n\\nKarnataka Chief Minister Siddaramaiah has ... \n", + "19 \\n\\nUnion Minister has praised Odisha Chief Mi... \n", + "20 \\n\\nA learner from upGrad has successfully tra... \n", + "21 \\n\\nA Delhi-based techie recently won a year's... \n", + "22 \\n\\nNew Zealand ended India's 12-match winning... \n", + "23 \\n\\nAegon Life's iTerm Insurance Plan is a gre... \n", + "24 \\n\\nSonam Kapoor has known filmmaker Rajkumar ... \n", + "25 \\n\\nRahat Fateh Ali Khan, a renowned Pakistani... \n", + "26 \\nIndia suffered a humiliating defeat in their... \n", + "27 \\n\\nThe Government has directed Alok Verma, th... \n", + "28 \\n\\nAndhra Pradesh Chief Minister YS Jagan Moh... \n", + "29 \\nThe Congress party has won the Ramgarh bypol... \n", + "30 \\nTwo cousins from Uttar Pradesh fed human exc... \n", + "31 \\n\\nAn 81-year-old woman in Jharkhand is inspi... \n", + "32 \\nAt the Kumbh Mela, Baba Ramdev addressed a g... \n", + "33 \\n\\nA witness has reported that a pharmaceutic... \n", + "34 \\n\\nIsha Ambani, daughter of India's richest m... \n", + "35 \\n\\nLVMH, the parent company of luxury fashion... \n", + "36 \\n\\nOn the opening night of the Lakme Fashion ... \n", + "37 \\nPrime Minister Narendra Modi took a jibe at ... \n", + "38 \\n\\nKarnataka Chief Minister Siddaramaiah has ... \n", + "39 \\n\\nUnion Minister has praised Odisha Chief Mi... \n", + "\n", + " actual_result eval_score pass \n", + "0 \\n\\nA learner from upGrad has successfully tra... 0.727273 True \n", + "1 \\n\\nA Delhi-based techie recently won a year's... 0.291667 False \n", + "2 \\n\\nIndia, led by Rohit Sharma, recently ended... 0.152174 False \n", + "3 \\n\\nAegon Life iTerm Insurance Plan is a tax-s... 0.400000 False \n", + "4 \\n\\nSonam Kapoor has known filmmaker Rajkumar ... 0.406250 False \n", + "5 \\n\\nRahat Fateh Ali Khan, a renowned Pakistani... 0.707317 True \n", + "6 \\nIndia suffered a major batting collapse in t... 0.523810 True \n", + "7 \\n\\nThe Government of India has directed Alok ... 0.422222 False \n", + "8 \\n\\nAndhra Pradesh Chief Minister YS Jagan Moh... 0.521739 True \n", + "9 \\nThe Congress party has won the Ramgarh by-el... 0.622222 True \n", + "10 \\nTwo cousins from Uttar Pradesh fed human exc... 0.571429 True \n", + "11 \\nAn 81-year-old woman in Jharkhand is inspiri... 0.415842 False \n", + "12 \\n\\nRam and Krishna, two revered figures in Hi... 0.082192 False \n", + "13 \\n\\nA witness has reported that a pharmaceutic... 0.417582 False \n", + "14 \\n\\nIsha Ambani, daughter of India's wealthies... 0.340909 False \n", + "15 \\n\\nLVMH, the parent company of luxury fashion... 0.476190 False \n", + "16 \\n\\nOn the opening night of the Lakme Fashion ... 0.292683 False \n", + "17 \\nPrime Minister Narendra Modi took a jibe at ... 0.615385 True \n", + "18 \\n\\nKarnataka Chief Minister Siddaramaiah has ... 0.528736 True \n", + "19 \\n\\nUnion Minister has praised Odisha Chief Mi... 0.736842 True \n", + "20 \\n\\nA learner from UpGrad has successfully swi... 0.571429 True \n", + "21 \\n\\nA Delhi-based techie has won free food fro... 0.168421 False \n", + "22 \\n\\nIndia's 12-match winning streak, led by Ro... 0.305556 False \n", + "23 \\n\\nAegon Life iTerm Insurance Plan is a great... 0.628571 True \n", + "24 \\n\\nSonam Kapoor has known filmmaker Rajkumar ... 0.555556 True \n", + "25 \\n\\nRahat Fateh Ali Khan, a renowned Pakistani... 0.419753 False \n", + "26 \\nIndia suffered a major batting collapse in t... 0.705882 True \n", + "27 \\n\\nThe Government has directed Alok Verma to ... 0.610526 True \n", + "28 \\n\\nAndhra Pradesh Chief Minister called Prime... 0.296296 False \n", + "29 \\n\\nThe Congress party has won the Ramgarh by-... 0.784810 True \n", + "30 \\n\\nTwo cousins fed human excreta to boys in o... 0.222222 False \n", + "31 \\n\\nAn 81-year-old woman in Jharkhand, India, ... 0.408163 False \n", + "32 \\nRam and Krishna, two sadhus at the Kumbh Mel... 0.171429 False \n", + "33 \\n\\nA witness has reported that a pharmaceutic... 0.525000 True \n", + "34 \\n\\nIsha Ambani, daughter of India's richest m... 0.329670 False \n", + "35 \\n\\nThe owner of Louis Vuitton, Bernard Arnaul... 0.411215 False \n", + "36 \\n\\nOn the opening night of the Lakme Fashion ... 0.506667 True \n", + "37 \\nPrime Minister Narendra Modi took a jibe at ... 0.451613 False \n", + "38 \\n\\nKarnataka Chief Minister has expressed his... 0.426230 False \n", + "39 \\n\\nUnion Minister has praised Odisha Chief Mi... 0.795181 True " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.generated_results()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This method returns the generated results in the form of a pandas dataframe, which provides a convenient and easy-to-use format for working with the test results. You can use this method to quickly identify the test cases that failed and to determine where fixes are needed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Final Results\n", + "\n", + "We can call `.report()` which summarizes the results giving information about pass and fail counts and overall test pass/fail flag." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T11:21:38.236285Z", + "iopub.status.busy": "2023-08-18T11:21:38.236115Z", + "iopub.status.idle": "2023-08-18T11:21:45.959047Z", + "shell.execute_reply": "2023-08-18T11:21:45.958567Z", + "shell.execute_reply.started": "2023-08-18T11:21:38.236269Z" + }, + "id": "xjrFobwfP_zs", + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorytest_typefail_countpass_countpass_rateminimum_pass_ratepass
0robustnessadd_typo11945%70%False
1robustnesslowercase11945%70%False
\n", + "
" + ], + "text/plain": [ + " category test_type fail_count pass_count pass_rate minimum_pass_rate \\\n", + "0 robustness add_typo 11 9 45% 70% \n", + "1 robustness lowercase 11 9 45% 70% \n", + "\n", + " pass \n", + "0 False \n", + "1 False " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "harness.report()" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyMJrE2NnWZKhTDI3/g+GDIS", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "034cffa7b1eb414f968a52554e93d732": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_387f28a7ebad434298f6d3389be2dccb", + "IPY_MODEL_7aef86a276f945a7b59633462dcda0dc", + "IPY_MODEL_534f1061303e45c2af004258b347f563" + ], + "layout": "IPY_MODEL_9a8d04a738f54a83842591462144de48" + } + }, + "048af93849664d89afd4ec54677e2480": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0967da2f32004a969cc121045548a48d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0a58308a1a604bd0bd172610e023bc6d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0e8cd24915dc4c7eac076830d84bf35e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "114ccd8e9c8449738caee4dfaba0b9bf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1223e7fe38b04c88b4632e38b340fb9c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "164fd5a9aa564d2ca5a7e70a869cfe5d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "241cfd171c5e4ec4b35df40cbf06b8d9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "254f9c127d034fefa6fe822135dff282": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "266170876ef945d1b120b077c99269f5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2ca42f17eb804ff9a3930a50261d1a29", + "placeholder": "​", + "style": "IPY_MODEL_e7767e9b1b9248208a505fc47c6ac22b", + "value": "Downloading (…)cial_tokens_map.json: 100%" + } + }, + "27a130390ba549dd87e37ad6a48ae295": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2bc7dd17871a4997a65520b197b8c9b1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2ca42f17eb804ff9a3930a50261d1a29": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "342f8a8110a4483f963257c74cf4b182": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4c6b33e2902a4ae1aff0bdf162397c78", + "max": 333, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_241cfd171c5e4ec4b35df40cbf06b8d9", + "value": 333 + } + }, + "362e3880a616453cb8e30c05f5056600": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "387f28a7ebad434298f6d3389be2dccb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8d33ffa462c641dc9d937bd5d0388674", + "placeholder": "​", + "style": "IPY_MODEL_e0692e0e66394689acf97322e94313d1", + "value": "Downloading (…)/main/tokenizer.json: 100%" + } + }, + "3af968c9b0914ee7a0886bd22f0a45df": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_496a9fa614e34ac9a88eedcc0bdddf5c", + "placeholder": "​", + "style": "IPY_MODEL_52febd9de1c24a89bc22fff9744da2bd", + "value": " 112/112 [00:00<00:00, 3.77kB/s]" + } + }, + "4224a031df984fdc935e24b42334e2ea": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_fe385707c3f74cf8b8a29a05b84ade5c", + "IPY_MODEL_5797e1b2ae2443009a8d3c6046259ba5", + "IPY_MODEL_ac6f0c3c83ba48cd89a1873f44b16585" + ], + "layout": "IPY_MODEL_992d3a415cf541c49a4b1f70467850c2" + } + }, + "42e2d470fbd641d5b743641db8c886ef": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "44d4e553878747369961b61f4f15be80": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "496a9fa614e34ac9a88eedcc0bdddf5c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4c6b33e2902a4ae1aff0bdf162397c78": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "516e39a0cf4e4fc09cf09da28f6bee97": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_362e3880a616453cb8e30c05f5056600", + "placeholder": "​", + "style": "IPY_MODEL_5aa9452586654839a9cbb0a2a4f236e2", + "value": " 232k/232k [00:00<00:00, 3.27MB/s]" + } + }, + "52febd9de1c24a89bc22fff9744da2bd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "534f1061303e45c2af004258b347f563": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2bc7dd17871a4997a65520b197b8c9b1", + "placeholder": "​", + "style": "IPY_MODEL_bad9ccd0bc6b478e9629bd4fbe2ad173", + "value": " 466k/466k [00:00<00:00, 8.95MB/s]" + } + }, + "5428a75e2fd24cc08740d8fbe6dbe66f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "568aeab42d2e42c1afff49331259e44c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0967da2f32004a969cc121045548a48d", + "placeholder": "​", + "style": "IPY_MODEL_44d4e553878747369961b61f4f15be80", + "value": " 735/735 [00:00<00:00, 32.7kB/s]" + } + }, + "5797e1b2ae2443009a8d3c6046259ba5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a4d8a97c7e194df3b17682c0a7d78578", + "max": 267860081, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_164fd5a9aa564d2ca5a7e70a869cfe5d", + "value": 267860081 + } + }, + "5aa9452586654839a9cbb0a2a4f236e2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "61941e4dd77c4bb3b744017a5bbec9ad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1223e7fe38b04c88b4632e38b340fb9c", + "max": 735, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c037220a858d44e6967f8ab318da5e19", + "value": 735 + } + }, + "6587cde91bea403b9a12091d68834dde": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6d418e239b9241f2854dd6b89f8e895a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_feb1ed3bd3cf4b6a8855c47b2827c8ed", + "placeholder": "​", + "style": "IPY_MODEL_114ccd8e9c8449738caee4dfaba0b9bf", + "value": " 333/333 [00:00<00:00, 6.01kB/s]" + } + }, + "7233d1441540421c9c2843b9205a925c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_42e2d470fbd641d5b743641db8c886ef", + "placeholder": "​", + "style": "IPY_MODEL_5428a75e2fd24cc08740d8fbe6dbe66f", + "value": "Downloading (…)lve/main/config.json: 100%" + } + }, + "7a4b23c0d420415a9372588849fc596a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_eac5c25c2a074d91a1c3d1140784fa42", + "placeholder": "​", + "style": "IPY_MODEL_254f9c127d034fefa6fe822135dff282", + "value": "Downloading (…)okenizer_config.json: 100%" + } + }, + "7aef86a276f945a7b59633462dcda0dc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fc19730e6db34f46a4e3946320c27b20", + "max": 466132, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a1421d1aa0eb4c74ba030a9882a462d0", + "value": 466132 + } + }, + "8d33ffa462c641dc9d937bd5d0388674": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "91f77d5b775448cfb4fd80400b1ce67d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "96e35138ac394407893e5a392d3f15c4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_dcdb8dcd06024ae389cf42f9146ec760", + "IPY_MODEL_b034e2c7e67a49729cc2e27172c7e12c", + "IPY_MODEL_516e39a0cf4e4fc09cf09da28f6bee97" + ], + "layout": "IPY_MODEL_f4f09ec4d9624deaacdeaeb0efa4ac21" + } + }, + "992d3a415cf541c49a4b1f70467850c2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9a8d04a738f54a83842591462144de48": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a1421d1aa0eb4c74ba030a9882a462d0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a4d8a97c7e194df3b17682c0a7d78578": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a5af7cb3a3504a27bfbcb8067e22baa1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7233d1441540421c9c2843b9205a925c", + "IPY_MODEL_61941e4dd77c4bb3b744017a5bbec9ad", + "IPY_MODEL_568aeab42d2e42c1afff49331259e44c" + ], + "layout": "IPY_MODEL_e3f2d97226fc46919f01b4ee0e2c6b0e" + } + }, + "ac6f0c3c83ba48cd89a1873f44b16585": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0a58308a1a604bd0bd172610e023bc6d", + "placeholder": "​", + "style": "IPY_MODEL_91f77d5b775448cfb4fd80400b1ce67d", + "value": " 268M/268M [00:01<00:00, 145MB/s]" + } + }, + "b034e2c7e67a49729cc2e27172c7e12c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_048af93849664d89afd4ec54677e2480", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0e8cd24915dc4c7eac076830d84bf35e", + "value": 231508 + } + }, + "bad9ccd0bc6b478e9629bd4fbe2ad173": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c037220a858d44e6967f8ab318da5e19": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c788ae4ea7f84b698bcf3ecf936ffa79": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c7d68d0d64fb4807b081aa4af533a6ac": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7a4b23c0d420415a9372588849fc596a", + "IPY_MODEL_342f8a8110a4483f963257c74cf4b182", + "IPY_MODEL_6d418e239b9241f2854dd6b89f8e895a" + ], + "layout": "IPY_MODEL_dfb27bc830f042729e0a2ac17f6f28eb" + } + }, + "cdccebee635e4570b27eec736c601860": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "da3ac361f4c54564b464f6ecb78e7106": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_266170876ef945d1b120b077c99269f5", + "IPY_MODEL_fbff364c69214d9dab368b822782e7dc", + "IPY_MODEL_3af968c9b0914ee7a0886bd22f0a45df" + ], + "layout": "IPY_MODEL_de39f19307ca4a10929f77448be713e0" + } + }, + "da5ac6f03ff44bd3ad0240c8362a998b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "dcdb8dcd06024ae389cf42f9146ec760": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cdccebee635e4570b27eec736c601860", + "placeholder": "​", + "style": "IPY_MODEL_fd718efc7f8c4af0b93e66d7d3163026", + "value": "Downloading (…)solve/main/vocab.txt: 100%" + } + }, + "de39f19307ca4a10929f77448be713e0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dfb27bc830f042729e0a2ac17f6f28eb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e0692e0e66394689acf97322e94313d1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e3f2d97226fc46919f01b4ee0e2c6b0e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e7767e9b1b9248208a505fc47c6ac22b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "eac5c25c2a074d91a1c3d1140784fa42": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f4f09ec4d9624deaacdeaeb0efa4ac21": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fbff364c69214d9dab368b822782e7dc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_27a130390ba549dd87e37ad6a48ae295", + "max": 112, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_da5ac6f03ff44bd3ad0240c8362a998b", + "value": 112 + } + }, + "fc19730e6db34f46a4e3946320c27b20": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd718efc7f8c4af0b93e66d7d3163026": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fe385707c3f74cf8b8a29a05b84ade5c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6587cde91bea403b9a12091d68834dde", + "placeholder": "​", + "style": "IPY_MODEL_c788ae4ea7f84b698bcf3ecf936ffa79", + "value": "Downloading pytorch_model.bin: 100%" + } + }, + "feb1ed3bd3cf4b6a8855c47b2827c8ed": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From f1a4c78e8e26999a6f95fa35810fb1e20b8dbc2c Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 18 Aug 2023 19:01:25 +0530 Subject: [PATCH 22/24] updated docstring and added tests --- langtest/datahandler/datasource.py | 14 ++++++++++---- tests/test_datasource.py | 22 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 6b52e4c58..89e40fade 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -465,7 +465,7 @@ class CSVDataset(_IDataset): Attributes: _file_path (Union[str, Dict]): - The path to the data file or a dictionary containing "name" key with the path. + The path to the data file or a dictionary containing "data_source" key with the path. task (str): Specifies the task of the dataset, which can be either "text-classification","ner" "question-answering" and "summarization". @@ -479,10 +479,16 @@ def __init__(self, file_path: Union[str, Dict], task: str, **kwargs) -> None: Args: file_path (Union[str, Dict]): - The path to the data file or a dictionary containing "data_source" key with the path + The path to the data file or a dictionary containing the following keys: + - "data_source": The path to the data file. + - "feature_column" (optional): Specifies the column containing input features. + - "target_column" (optional): Specifies the column containing target labels. task (str): - Specifies the task of the dataset, which can be either "text-classification","ner" - "question-answering" and "summarization". + Specifies the task of the dataset, which can be one of the following: + - "text-classification" + - "ner" (Named Entity Recognition) + - "question-answering" + - "summarization" **kwargs: Additional keyword arguments that can be used to configure the dataset (optional). """ diff --git a/tests/test_datasource.py b/tests/test_datasource.py index e502ae16a..fe75d1d36 100644 --- a/tests/test_datasource.py +++ b/tests/test_datasource.py @@ -81,6 +81,17 @@ def test_load_raw_data(self, dataset, feature_col, target_col): }, ), (CSVDataset(file_path="tests/fixtures/tner.csv", task="ner"), {}), + ( + CSVDataset( + file_path={ + "data_source": "tests/fixtures/tner.csv", + "feature_column": "tokens", + "target_column": "ner_tags", + }, + task="ner", + ), + {}, + ), (ConllDataset(file_path="tests/fixtures/test.conll", task="ner"), {}), ], ) @@ -153,6 +164,17 @@ def test_export_data_conll(self): "text", "label", ), + ( + CSVDataset( + file_path={ + "data_source": "tests/fixtures/text_classification.csv", + "feature_column": "text", + "target_column": "label", + }, + task="text-classification", + ), + {}, + ), ( HuggingFaceDataset(dataset_name="dbrd", task="text-classification"), "text", From 9279cde564a5bf1db12216b2dc2b6ebdeb89e4fa Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 18 Aug 2023 19:07:39 +0530 Subject: [PATCH 23/24] updated test_datasource.py --- tests/test_datasource.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_datasource.py b/tests/test_datasource.py index fe75d1d36..1b48bbcaa 100644 --- a/tests/test_datasource.py +++ b/tests/test_datasource.py @@ -173,7 +173,8 @@ def test_export_data_conll(self): }, task="text-classification", ), - {}, + "text", + "label", ), ( HuggingFaceDataset(dataset_name="dbrd", task="text-classification"), From 0f904dd52aa5c03ac031269d3d7297f712dab4cf Mon Sep 17 00:00:00 2001 From: Prikshit7766 Date: Fri, 18 Aug 2023 19:57:10 +0530 Subject: [PATCH 24/24] updated load_raw_data for csv --- langtest/datahandler/datasource.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 89e40fade..692415dc1 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -520,7 +520,32 @@ def load_raw_data(self, standardize_columns: bool = False) -> List[Dict]: List[Dict]: parsed CSV file into list of dicts """ - df = pd.read_csv(self._file_path) + + if type(self._file_path) == dict: + df = pd.read_csv(self._file_path["data_source"]) + + if self.task == "text-classification": + feature_column = self._file_path.get("feature_column", "text") + target_column = self._file_path.get("target_column", "label") + elif self.task == "ner": + feature_column = self._file_path.get("feature_column", "text") + target_column = self._file_path.get("target_column", "ner") + + if feature_column not in df.columns or target_column not in df.columns: + raise ValueError( + f"Columns '{feature_column}' and '{target_column}' not found in the dataset." + ) + + if self.task == "text-classification": + df.rename( + columns={feature_column: "text", target_column: "label"}, inplace=True + ) + elif self.task == "ner": + df.rename( + columns={feature_column: "text", target_column: "ner"}, inplace=True + ) + else: + df = pd.read_csv(self._file_path) raw_data = [] if not standardize_columns: