From 71f218350aedeb78c39b999b8a104900c2c9b503 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Wed, 1 May 2024 19:31:54 +0530
Subject: [PATCH 01/16] Basic Implementation of prompt techniques handles from
 config.

---
 langtest/prompts.py | 95 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 langtest/prompts.py

diff --git a/langtest/prompts.py b/langtest/prompts.py
new file mode 100644
index 000000000..4c9d21d0a
--- /dev/null
+++ b/langtest/prompts.py
@@ -0,0 +1,95 @@
+from typing import Any, List, Union
+
+from pydantic import BaseModel, Extra, validator
+
+
+class MessageType(BaseModel):
+
+    __field_order: List[str] = [
+        "content",
+        "context",
+        "question",
+        "options",
+        "answer"
+    ]
+
+    class Config:
+        extra = Extra.allow  # Allow any additional fields that are not explicitly declared
+
+    @validator('*', pre=True, allow_reuse=True)
+    def add_field(cls, v, values, field, **kwargs):
+        if 'fields' not in values:
+            values['fields'] = []
+        values['fields'].append(field)
+        return v
+
+    @property
+    def get_template(self):
+        """Generate a template string based on the dynamic fields of the instance."""
+        
+        temp = []
+        for field in self.__field_order:
+            if field in self.__dict__:
+                temp.append(f"{field.title()}: {{{field}}}")
+        return "\n"+"\n".join(temp)
+
+
+    @property
+    def get_example(self):
+        """Generate an example string based on the dynamic fields of the instance."""
+        # return {k: v for k, v in self.__dict__.items() if k != 'fields'}
+        temp = {}
+        for field in self.__field_order:
+            if field in self.__dict__:
+                temp[field] = self.__dict__[field]
+        return temp
+    
+
+class Conversion(BaseModel):
+    """Conversion model for the conversion of the input and output of the model."""
+
+    user: MessageType
+    ai: MessageType
+
+    class Config:
+        extra = Extra.allow  # Allow any additional fields that are not explicitly declared
+
+    @validator('*', pre=True, allow_reuse=True)
+    def add_field(cls, v, values, field, **kwargs):
+        if 'fields' not in values:
+            values['fields'] = []
+        values['fields'].append(field)
+        return v
+
+    @property
+    def get_examples(self):
+        """Generate a list of examples based on the dynamic fields of the instance."""
+        return {
+            **self.user.get_example,
+            **self.ai.get_example
+        }
+
+
+class PromptConfig(BaseModel):
+
+    instructions: str
+    prompt_type: str
+    examples: Union[Conversion, List[Conversion]] = None
+
+    @property
+    def get_examples(self):
+        """Generate a list of examples based on the dynamic fields of the instance."""
+        if isinstance(self.examples, Conversion):
+            return [self.examples.get_examples]
+        elif isinstance(self.examples, list):
+            return [example.get_examples for example in self.examples]
+        return self.examples.get_examples
+    
+    @property
+    def get_template(self):
+        """Generate a template string based on the dynamic fields of the instance."""
+        if isinstance(self.examples, Conversion):
+            return self.examples.user.get_template
+        elif isinstance(self.examples, list):
+            return [('user', self.examples[0].user.get_template), ('ai', self.examples[0].ai.get_template)]
+        # return self.examples.get_template
\ No newline at end of file

From 4160fea9d6007f3197a3e1b042b0680a6f8fcccb Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Wed, 1 May 2024 20:13:47 +0530
Subject: [PATCH 02/16] Refactor field order in MessageType and Conversion
 classes in prompts.py

---
 langtest/prompts.py | 55 +++++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 30 deletions(-)

diff --git a/langtest/prompts.py b/langtest/prompts.py
index 4c9d21d0a..617e68687 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -4,35 +4,29 @@
 
 
 class MessageType(BaseModel):
-
-    __field_order: List[str] = [
-        "content",
-        "context",
-        "question",
-        "options",
-        "answer"
-    ]
+    __field_order: List[str] = ["content", "context", "question", "options", "answer"]
 
     class Config:
-        extra = Extra.allow  # Allow any additional fields that are not explicitly declared
+        extra = (
+            Extra.allow
+        )  # Allow any additional fields that are not explicitly declared
 
-    @validator('*', pre=True, allow_reuse=True)
+    @validator("*", pre=True, allow_reuse=True)
     def add_field(cls, v, values, field, **kwargs):
-        if 'fields' not in values:
-            values['fields'] = []
-        values['fields'].append(field)
+        if "fields" not in values:
+            values["fields"] = []
+        values["fields"].append(field)
         return v
 
     @property
     def get_template(self):
         """Generate a template string based on the dynamic fields of the instance."""
-        
+
         temp = []
         for field in self.__field_order:
             if field in self.__dict__:
                 temp.append(f"{field.title()}: {{{field}}}")
-        return "\n"+"\n".join(temp)
-
+        return "\n" + "\n".join(temp)
 
     @property
     def get_example(self):
@@ -43,7 +37,7 @@ def get_example(self):
             if field in self.__dict__:
                 temp[field] = self.__dict__[field]
         return temp
-    
+
 
 class Conversion(BaseModel):
     """Conversion model for the conversion of the input and output of the model."""
@@ -52,26 +46,24 @@ class Conversion(BaseModel):
     ai: MessageType
 
     class Config:
-        extra = Extra.allow  # Allow any additional fields that are not explicitly declared
+        extra = (
+            Extra.allow
+        )  # Allow any additional fields that are not explicitly declared
 
-    @validator('*', pre=True, allow_reuse=True)
+    @validator("*", pre=True, allow_reuse=True)
     def add_field(cls, v, values, field, **kwargs):
-        if 'fields' not in values:
-            values['fields'] = []
-        values['fields'].append(field)
+        if "fields" not in values:
+            values["fields"] = []
+        values["fields"].append(field)
         return v
 
     @property
     def get_examples(self):
         """Generate a list of examples based on the dynamic fields of the instance."""
-        return {
-            **self.user.get_example,
-            **self.ai.get_example
-        }
+        return {**self.user.get_example, **self.ai.get_example}
 
 
 class PromptConfig(BaseModel):
-
     instructions: str
     prompt_type: str
     examples: Union[Conversion, List[Conversion]] = None
@@ -84,12 +76,15 @@ def get_examples(self):
         elif isinstance(self.examples, list):
             return [example.get_examples for example in self.examples]
         return self.examples.get_examples
-    
+
     @property
     def get_template(self):
         """Generate a template string based on the dynamic fields of the instance."""
         if isinstance(self.examples, Conversion):
             return self.examples.user.get_template
         elif isinstance(self.examples, list):
-            return [('user', self.examples[0].user.get_template), ('ai', self.examples[0].ai.get_template)]
-        # return self.examples.get_template
\ No newline at end of file
+            return [
+                ("user", self.examples[0].user.get_template),
+                ("ai", self.examples[0].ai.get_template),
+            ]
+        # return self.examples.get_template

From 38c4749b7d203a9232fa995440d7f217b0d123af Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Wed, 1 May 2024 20:32:09 +0530
Subject: [PATCH 03/16] lint fix

---
 langtest/prompts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langtest/prompts.py b/langtest/prompts.py
index 617e68687..071f91900 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -1,4 +1,4 @@
-from typing import Any, List, Union
+from typing import List, Union
 
 from pydantic import BaseModel, Extra, validator
 

From 06c2ef69f85ff4405c46293e5f11956f4c096248 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Fri, 3 May 2024 20:19:30 +0530
Subject: [PATCH 04/16] Refactor field order in MessageType and Conversion
 classes in prompts.py

---
 langtest/prompts.py | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/langtest/prompts.py b/langtest/prompts.py
index 071f91900..4b260cda4 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -4,7 +4,7 @@
 
 
 class MessageType(BaseModel):
-    __field_order: List[str] = ["content", "context", "question", "options", "answer"]
+    __field_order: List[str] = ["content", "context", "question", "original", "testcase", "options", "answer"]
 
     class Config:
         extra = (
@@ -88,3 +88,36 @@ def get_template(self):
                 ("ai", self.examples[0].ai.get_template),
             ]
         # return self.examples.get_template
+    
+    def prompt_style(self):
+
+        if self.prompt_type == "chat":
+            from langchain.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
+
+            few_shot_prompt = FewShotChatMessagePromptTemplate(
+                examples=self.get_examples,
+                example_prompt=self.get_template,
+            )
+
+            final_prompt = ChatPromptTemplate.from_messages(
+                [
+                    ('system', self.instructions),
+                    few_shot_prompt,
+                    # ('human', conf),
+                    self.get_template[0],
+                ]
+            )
+            return final_prompt
+
+        elif self.prompt_type == "normal":
+            from langchain.prompts import FewShotPromptTemplate, PromptTemplate
+
+            example = PromptTemplate.from_template(self.get_template)
+
+            final_prompt = FewShotPromptTemplate(
+                examples=self.examples,
+                example_selector=example,
+            )
+
+            
+        

From 6e6216cd28ac0f1107df80aa6cc9c330caaa2c0c Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Fri, 3 May 2024 21:01:46 +0530
Subject: [PATCH 05/16] fixed lint

---
 langtest/prompts.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/langtest/prompts.py b/langtest/prompts.py
index 4b260cda4..db56ae68b 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -4,7 +4,15 @@
 
 
 class MessageType(BaseModel):
-    __field_order: List[str] = ["content", "context", "question", "original", "testcase", "options", "answer"]
+    __field_order: List[str] = [
+        "content",
+        "context",
+        "question",
+        "original",
+        "testcase",
+        "options",
+        "answer",
+    ]
 
     class Config:
         extra = (
@@ -88,11 +96,13 @@ def get_template(self):
                 ("ai", self.examples[0].ai.get_template),
             ]
         # return self.examples.get_template
-    
-    def prompt_style(self):
 
+    def prompt_style(self):
         if self.prompt_type == "chat":
-            from langchain.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
+            from langchain.prompts import (
+                ChatPromptTemplate,
+                FewShotChatMessagePromptTemplate,
+            )
 
             few_shot_prompt = FewShotChatMessagePromptTemplate(
                 examples=self.get_examples,
@@ -101,7 +111,7 @@ def prompt_style(self):
 
             final_prompt = ChatPromptTemplate.from_messages(
                 [
-                    ('system', self.instructions),
+                    ("system", self.instructions),
                     few_shot_prompt,
                     # ('human', conf),
                     self.get_template[0],
@@ -118,6 +128,3 @@ def prompt_style(self):
                 examples=self.examples,
                 example_selector=example,
             )
-
-            
-        

From 21ae26b8d1f7e6c251dd0c8852b9fa8ea81fcc3c Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Mon, 6 May 2024 11:28:52 +0530
Subject: [PATCH 06/16] improved to get prompt based on the style like chat or
 instruct

---
 langtest/prompts.py | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/langtest/prompts.py b/langtest/prompts.py
index db56ae68b..a3943d578 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -46,6 +46,14 @@ def get_example(self):
                 temp[field] = self.__dict__[field]
         return temp
 
+    @property
+    def input_variables(self):
+        temp = []
+        for field in self.__field_order:
+            if field in self.__dict__:
+                temp.append(field)
+        return temp
+
 
 class Conversion(BaseModel):
     """Conversion model for the conversion of the input and output of the model."""
@@ -92,21 +100,32 @@ def get_template(self):
             return self.examples.user.get_template
         elif isinstance(self.examples, list):
             return [
-                ("user", self.examples[0].user.get_template),
+                ("human", self.examples[0].user.get_template),
                 ("ai", self.examples[0].ai.get_template),
             ]
         # return self.examples.get_template
 
+    @property
+    def get_input_variables(self):
+        """Generate a list of input variables based on the dynamic fields of the instance."""
+        if isinstance(self.examples, Conversion):
+            return self.examples.user.input_variables
+        elif isinstance(self.examples, list):
+            return self.examples[0].user.input_variables
+
     def prompt_style(self):
+        """Generate a prompt based on the prompt type."""
         if self.prompt_type == "chat":
             from langchain.prompts import (
                 ChatPromptTemplate,
                 FewShotChatMessagePromptTemplate,
             )
 
+            example_prompt = ChatPromptTemplate.from_messages(self.get_template)
+
             few_shot_prompt = FewShotChatMessagePromptTemplate(
                 examples=self.get_examples,
-                example_prompt=self.get_template,
+                example_prompt=example_prompt,
             )
 
             final_prompt = ChatPromptTemplate.from_messages(
@@ -119,7 +138,7 @@ def prompt_style(self):
             )
             return final_prompt
 
-        elif self.prompt_type == "normal":
+        elif self.prompt_type == "instruct":
             from langchain.prompts import FewShotPromptTemplate, PromptTemplate
 
             example = PromptTemplate.from_template(self.get_template)
@@ -128,3 +147,6 @@ def prompt_style(self):
                 examples=self.examples,
                 example_selector=example,
             )
+
+    def get_prompt(self):
+        return self.prompt_style()

From 649a5dc5fbaefbe609c70e18079b7b91134fbc8e Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Mon, 6 May 2024 13:46:22 +0530
Subject: [PATCH 07/16] Handle the prompts for mulitple datasets.

---
 langtest/prompts.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/langtest/prompts.py b/langtest/prompts.py
index a3943d578..84e579e34 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -150,3 +150,37 @@ def prompt_style(self):
 
     def get_prompt(self):
         return self.prompt_style()
+
+
+class PromptManager:
+    _instance = None
+    prompt_configs = {}
+
+    def __new__(cls, *args, **kwargs):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance.prompt_configs = {}
+        return cls._instance
+
+    @classmethod
+    def from_prompt_configs(cls, prompt_configs: dict):
+        """Create a prompt manager from a dictionary of prompt configurations."""
+        prompt_manager = cls()
+        for name, prompt_config in prompt_configs.items():
+            prompt_manager.add_prompt(name, prompt_config)
+        return prompt_manager
+
+    def add_prompt(self, name, prompt_config):
+        """Add a prompt template to the prompt manager."""
+        self.prompt_configs[name] = prompt_config
+
+    def get_prompt(self, name):
+        """Get a prompt template based on the name."""
+        prompt_template = PromptConfig(**self.prompt_configs[name]).get_prompt()
+        return prompt_template
+
+    def reset(self):
+        """Reset the prompt manager to its initial state."""
+        self.prompt_configs = {}
+        self._instance = None
+        return self

From 73f16e6b36cd9ebaa16cf086649cb693855ec1f0 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Tue, 7 May 2024 12:28:25 +0530
Subject: [PATCH 08/16] Refactor prompt manager to handle default prompt
 configuration

---
 langtest/prompts.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/langtest/prompts.py b/langtest/prompts.py
index 84e579e34..673f5e404 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -154,7 +154,9 @@ def get_prompt(self):
 
 class PromptManager:
     _instance = None
-    prompt_configs = {}
+    prompt_configs = {
+        "default": "This is a default prompt configuration.",
+    }
 
     def __new__(cls, *args, **kwargs):
         if cls._instance is None:
@@ -166,15 +168,20 @@ def __new__(cls, *args, **kwargs):
     def from_prompt_configs(cls, prompt_configs: dict):
         """Create a prompt manager from a dictionary of prompt configurations."""
         prompt_manager = cls()
+        if ["instructions", "prompt_type", "examples"] in prompt_configs.keys():
+            prompt_manager.add_prompt("default", prompt_configs)
+            return prompt_manager
         for name, prompt_config in prompt_configs.items():
             prompt_manager.add_prompt(name, prompt_config)
         return prompt_manager
 
     def add_prompt(self, name, prompt_config):
         """Add a prompt template to the prompt manager."""
+        if ["instructions", "prompt_type", "examples"] not in prompt_config.keys():
+            self.prompt_configs["default"] = prompt_config
         self.prompt_configs[name] = prompt_config
 
-    def get_prompt(self, name):
+    def get_prompt(self, name="default"):
         """Get a prompt template based on the name."""
         prompt_template = PromptConfig(**self.prompt_configs[name]).get_prompt()
         return prompt_template

From db353338e8edf7bcad34b94acd65b64d0cf3b7f5 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Tue, 7 May 2024 13:56:02 +0530
Subject: [PATCH 09/16] Integrated with model_handler and prompt manager.

---
 langtest/langtest.py                      | 13 +++++++++++
 langtest/modelhandler/llm_modelhandler.py | 11 ++++++++-
 langtest/prompts.py                       | 28 ++++++++++++++++-------
 3 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index b5ba0d90f..03c79a597 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -13,6 +13,8 @@
 
 from pkg_resources import resource_filename
 
+from langtest.prompts import PromptManager
+
 from .tasks import TaskManager
 from .augmentation import AugmentRobustness, TemplaticAugment
 from .datahandler.datasource import DataFactory
@@ -181,6 +183,10 @@ def __init__(
                 resource_filename("langtest", "data/config.yml")
             )
 
+        # prompt config
+        self.__prompt_config = self._config.get("prompt_config", None)
+        self.prompt_manager = PromptManager.from_prompt_configs(self.__prompt_config)
+
         # model section
         if isinstance(model, list):
             model_dict = {}
@@ -1579,6 +1585,9 @@ def __multi_datasets_run(
 
         # Run the testcases for each dataset
         for dataset_name, samples in testcases.items():
+            # set prompt in prompt manager
+            if self.prompt_manager is not None:
+                self.prompt_manager.default_state = dataset_name
             # update user prompt for each dataset
             if temp_store_prompt and isinstance(temp_store_prompt, dict):
                 self._config.get("model_parameters", {}).update(
@@ -1627,3 +1636,7 @@ def __reset_defaults(self):
         """Reset the default values."""
         model_response = TestResultManager()
         model_response.clear_data()
+
+        # Reset the PromptManager
+        prompt_manager = PromptManager()
+        prompt_manager.reset()
diff --git a/langtest/modelhandler/llm_modelhandler.py b/langtest/modelhandler/llm_modelhandler.py
index b0e56a56f..bc5f74d95 100644
--- a/langtest/modelhandler/llm_modelhandler.py
+++ b/langtest/modelhandler/llm_modelhandler.py
@@ -136,7 +136,16 @@ def predict(self, text: Union[str, dict], prompt: dict, *args, **kwargs):
             The prediction result.
         """
         try:
-            prompt_template = PromptTemplate(**prompt)
+            # loading a prompt manager
+            from langtest.prompts import PromptManager
+
+            prompt_manager = PromptManager()
+
+            prompt_template = prompt_manager.get_prompt()
+
+            if prompt_template is None:
+                prompt_template = PromptTemplate(**prompt)
+
             llmchain = LLMChain(prompt=prompt_template, llm=self.model)
             output = llmchain.invoke(text)
             return output.get(llmchain.output_key, "")
diff --git a/langtest/prompts.py b/langtest/prompts.py
index 673f5e404..bcbf200ac 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -154,9 +154,8 @@ def get_prompt(self):
 
 class PromptManager:
     _instance = None
-    prompt_configs = {
-        "default": "This is a default prompt configuration.",
-    }
+    prompt_configs = {}
+    _default_state = None
 
     def __new__(cls, *args, **kwargs):
         if cls._instance is None:
@@ -168,24 +167,37 @@ def __new__(cls, *args, **kwargs):
     def from_prompt_configs(cls, prompt_configs: dict):
         """Create a prompt manager from a dictionary of prompt configurations."""
         prompt_manager = cls()
-        if ["instructions", "prompt_type", "examples"] in prompt_configs.keys():
+        if set(["instructions", "prompt_type", "examples"]).issubset(
+            set(prompt_configs.keys())
+        ):
             prompt_manager.add_prompt("default", prompt_configs)
             return prompt_manager
         for name, prompt_config in prompt_configs.items():
             prompt_manager.add_prompt(name, prompt_config)
+
+        if len(prompt_manager.prompt_configs) == 1:
+            prompt_manager.default_state = list(prompt_manager.prompt_configs.keys())[0]
         return prompt_manager
 
-    def add_prompt(self, name, prompt_config):
+    def add_prompt(self, name: str, prompt_config: dict):
         """Add a prompt template to the prompt manager."""
-        if ["instructions", "prompt_type", "examples"] not in prompt_config.keys():
-            self.prompt_configs["default"] = prompt_config
         self.prompt_configs[name] = prompt_config
 
-    def get_prompt(self, name="default"):
+    def get_prompt(self, name: str = None):
         """Get a prompt template based on the name."""
+        if name is None:
+            name = self.default_state
         prompt_template = PromptConfig(**self.prompt_configs[name]).get_prompt()
         return prompt_template
 
+    @property
+    def default_state(self):
+        return self._default_state
+
+    @default_state.setter
+    def default_state(self, name: str):
+        self._default_state = name
+
     def reset(self):
         """Reset the prompt manager to its initial state."""
         self.prompt_configs = {}

From ca0a4c1d8b4044fb8bc68f2b97e72ab51d168522 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Tue, 7 May 2024 14:08:04 +0530
Subject: [PATCH 10/16] error handling, when `prompt_config` is not available
 in config

---
 langtest/langtest.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index 03c79a597..e46074421 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -185,7 +185,8 @@ def __init__(
 
         # prompt config
         self.__prompt_config = self._config.get("prompt_config", None)
-        self.prompt_manager = PromptManager.from_prompt_configs(self.__prompt_config)
+        if self.__prompt_config:
+            self.prompt_manager = PromptManager.from_prompt_configs(self.__prompt_config)
 
         # model section
         if isinstance(model, list):
@@ -1586,7 +1587,7 @@ def __multi_datasets_run(
         # Run the testcases for each dataset
         for dataset_name, samples in testcases.items():
             # set prompt in prompt manager
-            if self.prompt_manager is not None:
+            if hasattr(self, "prompt_manager") and self.prompt_manager is not None:
                 self.prompt_manager.default_state = dataset_name
             # update user prompt for each dataset
             if temp_store_prompt and isinstance(temp_store_prompt, dict):

From e1cb02a7a9058bb0c72e8b6116ad66ac8354269d Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Wed, 8 May 2024 12:03:58 +0530
Subject: [PATCH 11/16] improve the prompt handling for instruct models

---
 langtest/prompts.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/langtest/prompts.py b/langtest/prompts.py
index bcbf200ac..5c368f4ef 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -78,6 +78,11 @@ def get_examples(self):
         """Generate a list of examples based on the dynamic fields of the instance."""
         return {**self.user.get_example, **self.ai.get_example}
 
+    @property
+    def get_suffix_user(self):
+        if self.user.get_template:
+            return self.user.get_template
+
 
 class PromptConfig(BaseModel):
     instructions: str
@@ -141,13 +146,23 @@ def prompt_style(self):
         elif self.prompt_type == "instruct":
             from langchain.prompts import FewShotPromptTemplate, PromptTemplate
 
-            example = PromptTemplate.from_template(self.get_template)
+            template = "".join(v for _, v in self.get_template)
+            template = f"{template.replace('Answer:', '')}"
+            examples = [v.get_examples for v in self.examples]
+            suffix = self.examples[0].get_suffix_user
+
+            example = PromptTemplate.from_template(template)
 
             final_prompt = FewShotPromptTemplate(
-                examples=self.examples,
-                example_selector=example,
+                examples=examples,
+                example_prompt=example,
+                input_variables=self.get_input_variables,
+                suffix=suffix,
+                prefix=self.instructions,
             )
 
+            return final_prompt
+
     def get_prompt(self):
         return self.prompt_style()
 

From cb0c2edb91231d0b7b868813f096bdd8a9f2e73e Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Wed, 8 May 2024 13:23:45 +0530
Subject: [PATCH 12/16] Refactor prompt manager to handle default prompt
 configuration

---
 langtest/prompts.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/langtest/prompts.py b/langtest/prompts.py
index 5c368f4ef..0a67f27f2 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -1,4 +1,5 @@
-from typing import List, Union
+from collections import defaultdict
+from typing import Dict, List, Union
 
 from pydantic import BaseModel, Extra, validator
 
@@ -166,16 +167,19 @@ def prompt_style(self):
     def get_prompt(self):
         return self.prompt_style()
 
+    def get_shot_prompt(self):
+        return f"{len(self.get_examples)}-shot {self.prompt_type} prompt"
+
 
 class PromptManager:
     _instance = None
-    prompt_configs = {}
+    prompt_configs: Dict[str, PromptConfig] = defaultdict(PromptConfig)
     _default_state = None
 
     def __new__(cls, *args, **kwargs):
         if cls._instance is None:
             cls._instance = super().__new__(cls)
-            cls._instance.prompt_configs = {}
+            cls._instance.prompt_configs = defaultdict(PromptConfig)
         return cls._instance
 
     @classmethod
@@ -196,13 +200,14 @@ def from_prompt_configs(cls, prompt_configs: dict):
 
     def add_prompt(self, name: str, prompt_config: dict):
         """Add a prompt template to the prompt manager."""
-        self.prompt_configs[name] = prompt_config
+        prompt_config_o = PromptConfig(**prompt_config)
+        self.prompt_configs[name] = prompt_config_o
 
     def get_prompt(self, name: str = None):
         """Get a prompt template based on the name."""
         if name is None:
             name = self.default_state
-        prompt_template = PromptConfig(**self.prompt_configs[name]).get_prompt()
+        prompt_template = self.prompt_configs[name].get_prompt()
         return prompt_template
 
     @property
@@ -213,8 +218,12 @@ def default_state(self):
     def default_state(self, name: str):
         self._default_state = name
 
+    @property
+    def get_prompt_shot(self):
+        return self.get_prompt().get_shot_prompt()
+
     def reset(self):
         """Reset the prompt manager to its initial state."""
-        self.prompt_configs = {}
+        self.prompt_configs = defaultdict(PromptConfig)
         self._instance = None
         return self

From f5d2c91cea00f695160be4a10800408be1391ea9 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Wed, 8 May 2024 15:12:53 +0530
Subject: [PATCH 13/16] improved in the lm studio

---
 .../modelhandler/lmstudio_modelhandler.py     | 32 +++++++++++----
 langtest/prompts.py                           | 39 ++++++++++++++++---
 2 files changed, 58 insertions(+), 13 deletions(-)

diff --git a/langtest/modelhandler/lmstudio_modelhandler.py b/langtest/modelhandler/lmstudio_modelhandler.py
index 5042b29ff..fbcc59f4f 100644
--- a/langtest/modelhandler/lmstudio_modelhandler.py
+++ b/langtest/modelhandler/lmstudio_modelhandler.py
@@ -1,5 +1,7 @@
 from typing import Any, Callable, Union
 
+from langtest.prompts import PromptManager
+
 from .modelhandler import ModelAPI
 from abc import ABC
 from functools import lru_cache
@@ -45,13 +47,20 @@ def chat_completion_api(text: str, url: str, server_prompt: str, **kwargs):
         input_data_func = kwargs.get("data")
         data = input_data_func(text)
     else:
-        if isinstance(server_prompt, str):
-            server_prompt = {"role": "assistant", "content": server_prompt}
+        examples = []
         user_text = {"role": "user", "content": text}
+
+        if kwargs.get("examples", None) and isinstance(kwargs.get("examples"), list):
+            examples.extend(kwargs.get("examples", []))
+
+        if isinstance(server_prompt, str):
+            server_prompt = {"role": "system", "content": server_prompt}
+        # user_text = {"role": "user", "content": text}
+        messages = [*examples, user_text]
+        # if server_prompt:
+        #     messages.insert(0, server_prompt)
         data = {
-            "messages": [*server_prompt, user_text]
-            if isinstance(server_prompt, tuple)
-            else [server_prompt, user_text],
+            "messages": messages,
             "temperature": kwargs.get("temperature", 0.2),
             "max_tokens": kwargs.get("max_tokens", -1),
             "stream": kwargs.get("stream", False),
@@ -152,12 +161,21 @@ def predict(
             str: The predicted output.
         """
         try:
-            prompt_template = SimplePromptTemplate(**prompt)
-            p = prompt_template.format(**text)
+            prompt_examples = PromptManager()
+            examples = prompt_examples.get_prompt(hub="lm-studio")
+
+            if examples:
+                prompt["template"] = "".join(f"{k.title()}: {{{k}}}" for k in text.keys())
+                prompt_template = SimplePromptTemplate(**prompt)
+                p = prompt_template.format(**text)
+            else:
+                prompt_template = SimplePromptTemplate(**prompt)
+                p = prompt_template.format(**text)
             op = chat_completion_api(
                 text=p,
                 url=self.model,
                 server_prompt=server_prompt,
+                examples=examples,
                 *args,
                 **self.kwargs,
             )
diff --git a/langtest/prompts.py b/langtest/prompts.py
index 0a67f27f2..6779c4de1 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -91,13 +91,12 @@ class PromptConfig(BaseModel):
     examples: Union[Conversion, List[Conversion]] = None
 
     @property
-    def get_examples(self):
+    def get_examples(self) -> List[dict]:
         """Generate a list of examples based on the dynamic fields of the instance."""
         if isinstance(self.examples, Conversion):
             return [self.examples.get_examples]
         elif isinstance(self.examples, list):
             return [example.get_examples for example in self.examples]
-        return self.examples.get_examples
 
     @property
     def get_template(self):
@@ -164,11 +163,37 @@ def prompt_style(self):
 
             return final_prompt
 
-    def get_prompt(self):
+    def get_prompt(self, hub=None):
+        if hub == "lm-studio":
+            return self.lm_studio_prompt()
         return self.prompt_style()
 
     def get_shot_prompt(self):
-        return f"{len(self.get_examples)}-shot {self.prompt_type} prompt"
+        return f"{len(self.get_examples)}-shot prompt"
+
+    def lm_studio_prompt(self):
+        messages = [
+            {"role": "system", "content": self.instructions},
+        ]
+
+        for example in self.examples:
+            temp_user = {}
+            temp_ai = {}
+
+            # user role
+            temp_user["role"] = "user"
+            temp_user["content"] = example.user.get_template.format(
+                **example.user.get_example
+            )
+
+            # assistant role
+            temp_ai["role"] = "assistant"
+            temp_ai["content"] = example.ai.get_template.format(**example.ai.get_example)
+
+            messages.append(temp_user)
+            messages.append(temp_ai)
+            # return messages
+        return messages
 
 
 class PromptManager:
@@ -203,11 +228,13 @@ def add_prompt(self, name: str, prompt_config: dict):
         prompt_config_o = PromptConfig(**prompt_config)
         self.prompt_configs[name] = prompt_config_o
 
-    def get_prompt(self, name: str = None):
+    def get_prompt(self, name: str = None, hub: str = None):
         """Get a prompt template based on the name."""
+        if name is None and self.default_state is None:
+            return None
         if name is None:
             name = self.default_state
-        prompt_template = self.prompt_configs[name].get_prompt()
+        prompt_template = self.prompt_configs[name].get_prompt(hub)
         return prompt_template
 
     @property

From 5e536eea14b6908b3acdc679925078b884a705a4 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Wed, 8 May 2024 15:39:51 +0530
Subject: [PATCH 14/16] support orderless field in MessageType object

---
 langtest/prompts.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/langtest/prompts.py b/langtest/prompts.py
index 6779c4de1..870b451c4 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -32,9 +32,15 @@ def get_template(self):
         """Generate a template string based on the dynamic fields of the instance."""
 
         temp = []
+        order_less = []
         for field in self.__field_order:
+            formatted = f"{field.title()}: {{{field}}}"
             if field in self.__dict__:
-                temp.append(f"{field.title()}: {{{field}}}")
+                temp.append(formatted)
+            else:
+                order_less.append(formatted)
+        if order_less:
+            temp.extend(order_less)
         return "\n" + "\n".join(temp)
 
     @property

From 3b0712c4e08415a9f23be2bd6fdb38b6687096cf Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Thu, 9 May 2024 15:37:46 +0530
Subject: [PATCH 15/16] fix the prompt issues and nb update

---
 .../llm_notebooks/Fewshot_QA_Notebook.ipynb   | 1392 +++++++++++++++++
 langtest/prompts.py                           |   10 +-
 2 files changed, 1397 insertions(+), 5 deletions(-)
 create mode 100644 demo/tutorials/llm_notebooks/Fewshot_QA_Notebook.ipynb

diff --git a/demo/tutorials/llm_notebooks/Fewshot_QA_Notebook.ipynb b/demo/tutorials/llm_notebooks/Fewshot_QA_Notebook.ipynb
new file mode 100644
index 000000000..6b5385f16
--- /dev/null
+++ b/demo/tutorials/llm_notebooks/Fewshot_QA_Notebook.ipynb
@@ -0,0 +1,1392 @@
+{
+  "cells": [
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "e7PsSmy9sCoR"
+      },
+      "source": [
+        "![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAUgAAABcCAYAAAAMJCwKAAAgAElEQVR4nOy9f5gcZ3Xn+znnra5pjcfKZCyNfqDIQgghZMdxZMfGxpbbwhjM2g4h2Ak/Nol3Aw5xEsLu5eHh8vCofNl9uFluLhiwhUi4zib3ZomcZBMgARsjt4RxbGIritcSsiyE0GpleSQLMYxHPd1V59w/qnq6Z6ZnNJJG/Ej6+zw9PW911fueeqvq1Pn9CucASZJokkzZaudirC666KKLcwWZ+y4TveyWJeW4/lKZYYD5mI2m8+YdH61Wk3Tux+uiiy66ODeYYwaZaKUysNSI7xSVtfj4MCPi9t8WLhzY+sADt9fndswuuuiii3ODaO66ShQSM7lvvYj8B6A8/pMIiM4/evToTuDI3I3ZRRdddHHuMIcMMocgC9ysFwx3DBzVyFzCQBpF8VyP10UXXXRxrjDnDBJygdFyl4wiTS3egJPnYrguuuiii3MCPRedem57NHBk3A6pwLxzMVwXXXTRxTnBnEmQSZJ/xP2gaDjhrv00vTSigB12tVqSJNrcf/p+uiFBXXTRxY8ec+7Fvuqq+f1RT/ktgl40PogwbKn/XQgv7KhUsJwBJjNIr10G2UUXXfzocU7iICsV9AfnL4k5nG85//zYKpXv1pMksStv+uT8eKy0RtyWqU9U8U1cU5e9Mb17qtU7anNPWxdddNHF7HEOGOTUTJpKBa1UsC271kYLjh79zyL6bnefP3F4b5JzxLEPvrhw4Z/v7sZMdtFFFz9CnBMGORW5On1V5YLVsUT/CNJrlnXcUzXg+JfU7c5K5ehQ1x7ZRRdd/KhwTsJ8JqMpTW7dzlJc+swykBZ3HpcdAfcMkVAGLVerKHl8UBdddNHFDx3nJMxn2sHMFYrEmrbtPyQxtosuuujitPBDlSDXbwgqDo4grUTtCRJkF1100cWPC+aIQc4uZMdMLAhtzDH/lo7KdhdddNHFjxZzwCATXbuWCNZO8/sWBgdfUvhuCh75hN8mM8P2djfKp4suuvjR4iwYZKLXvq7/YrGeD7jbIBxF3NskyZZ/JTc9LkyBBdP5XNxBwETV8OwwcKJSwarVM6ewiy666OJscEb6bJIkWq0uXOkS/ptqaZ1ZSqsoxQxwU/f28J7Jxzil6LwnG/aDD2zf+rtbz4S2Lrrooou5whlLkCa+LmjP8ix9KXUkEloWxBm+TaTwnDsmok+L6iHcIxcxaBzP0h98bnvlxe1szetLnu0JdtFFF12cKc6YQbprjLgiolKECzXlwVN9Fz2kmdumyPyhNLhGmRhEI9XqnceongFzLIpg0A0s76KLLuYILQaZJAobIZFZMphsgnQ4W7g7ICaAqp2oXHfs4K5dREePthsnZ2BySdPOWS2+K5bTvLG5rcsgu+iiizlBziCTRyIWDpY5ursO5PnPic8QunM3ofgvZ46T2eSp2tB04iRJYkmSpDOmFCau44x77e6II3GZ0s+U0bEyvq+PTc/2Ic8tw5fGJL5l9ky+iy666GJ65AxyydJVuN7OYh/lM88OIQwjz42QygjKMJ6OYlajhzqhd5Q7qFPJO/Ai7Lv5fx7VOHO7CfdZZPJsPtwLe9fxmb2D4H286IuJWYTqAvS8BbgsRmwAGCTL9gFb5mhuuuiii3/lyBlkqsuZN+8OsvogIaqhOgqhRikbJUtHca2TpaM0pE5afzBJNn5m/bb7VGkP8p74/3TtcSapBhODIjvDvj9I+fy7kbCGtF7GrBfPYtwUc8vXd3AIEdC5AEYXXXTRxZkgZ5Alt9yg6BH1sX5gfsHbNOdnriBQ7jVOvpRWqH72rHVYY3bGSytFNBqLkXSQrFFInN70hBffbmiYZYdddNFFF7NDIUECJcgZjytNxtiEA7iRpYqQTu2mubPMsi2AIGKz5LMCmOKmHeMtu3yxiy66OAeI2v6eIthbirVlRGGyq3imlMHJ7bbM60ICzMuatSrsTlmXRrFZqeNddNFFF3OIXEXtIBNOz5CauvfZQ0TqANXqRH47qyK5XYbZRRddnGNMlCDbMUWY7MyR2r3Ys4XjiKC4r61UPnMQsrJpi0lm+olDpfTE4Wo16cS6p6Gviy666GJuMZE1+mTD4/RcyFWsGcRzOpCWAKogHzGyjwATdPbg8QF06d2Vyv2fn75WRbc0WhdddHFuMclJAy3GM7lG4xSHSwp5QLa7W3uwT4t1easHkem1cqHVrWMi0XIXeY9Qa/LHtmOno+cnH801wydt6wa9d9HFjwgdVOxTOVya8N2W1YdE4wXi2YxH5BFERidm5u75/sVPDmAZIEsta/QC9YnHdex9GhrPHJ2YVbH9HDCsRG+6aaCvWg29k3+pVDanlcrzx//lMMr2eW2d08SVMP+lnOuPEdoz485Vptnk7LvTHSdxhbvJ04anw91nXm+hSV87XaeYl4kqdrsXe4oGOy7iWZWKVbJtu2HwfZlnG8VZPC1RCuLgbgMg/ePVfMaHLAZpfakI5gBxTOvHSUzwHGrY0zHHczXWU08tKZ8YyX4f918uwt5VwAwipfF0tbrkvUmS/EQzyZwBJkYClSo6NFRELly0FtjNll1Q1P+05vz/JJ9vF2eARGxqrYV2VIqaC8nE9ONT9lvUmWj2u2VXG9/bDbuHLO+bKf1Ob4OcUqpxIiOrVLAk+e2HIdl62WVLykuXTkfd8wCcGB78UAjRfzCrRyAzVBGapTR4jpjjbbdtiavVY+sybIUIRhaADIJHiB4DHprrMYeGxqK4HF6uIbrYLVMpXgiRBixr1EulenzKTn5skWilglarS/qvrty7LFTlNSby6gWLfJkg/Rw7rrB4FOG4kR1av97/6aGq7CXWw5VKcnxGR10Xs8Omb61A9l0OGXhQPv2tnfzOq/fOWf/JIxFLll2CPbsq3yCK6yj3f2c7d7z8xCmP37Ir5lhpGZEuxp5dCroAedl8JJQR78ElxTmJ7x0G389nnjuI7B0i8eP5+DMwysSVnzown/i5FaitI7rwSk74UpA+xFPcj7P0woPw3C42P/c0YfcBEj/R7HN6RuU+KS6yybgKKRVyzpwk9tRTjD711LQUKsC111nqba6Yyd7vZnvWPvEp9J09KpUkOjR8qC/WeXeKh7fnGToOLghR5GZPcg4Y5Lx5wTL31C2z3BSRM0jLR09H53rAHwKaUmC1urA3w25Q4ZYS4Ro3WyUiKqJ4YcMW0DyyIeBqtZLqARq+AwY/BTz+Iz2Rn2Q0JSd/7mpCuAejTKlkYB8C5oZBJolywZJBotIHSeVW8BSIEB2hkd4BfKHJJzof78rRby9nXvmjZI31CPNxi0GLpBAthCEDF0PCMCE6hNsOFu39Mg39exIfmZZJLn52HRq/DS29kbSxGhFFFEQUHBzDHUxSotJBTP+SZbs/1mSSE+MgRVpSZJP5TG5PqEp2ahWoZVcquivY38QCFq32KVleJ/rm0ATZM3aeQkCQCCd2J3aIEVVkJsn37CCtOyEPgZrgiPrJxBe/uKScuX44aM/HwX8NfBU47hlmDSyr5x+r45ZinoEQ46zGeKuJLYcfrsnjXxaaaqUoqhEiMVEMOoPD9ExQ0lVIuJjcfFYGIkLUj+hNwKn5hKS9qCwDGaD5rIWIfBGWDDzL81OiHiWEftzW4PZOeno/TmQbedm+pR2rj21+9hqi8iZEfhv31WgUIZr32RiDtFgJQRVEIpxVGOsIvdOo2DBVahxvnzkXShL42rai+0nGw9MNE+pM31w7aQzM8WbON27F2+aHgJ9873zTrnre+endIfT8dpaNxTiKoHnWapvtuWi3NRRxQ+WAethd9Ne1RZ4NJrAOn7uKqYkra3dHHLN1pPXlxeJTxRgZmN/A//vcfN75yuHpO7kb5J2FFJfm6cRwgKzxNwj/E6eGiaLWh6SvxFmPllbgBo2xBcQ9v0Wj3s/CAx8i8aFxO+aSfZcS9XycrL4OMyOUFLLDGF/CfRduI0BMlr4c90twW8d5fQsYPvY1vvuq4dxZNNmL3ZTOxnmYTGqfBQwIs+lqMmMYyw+cvEs7fXMNV/WiMlBLqJbTZ+b/SrFlF9HCkfR3Qii/O01PxiIStU+d5Kq1tiWdGoKKY/nLCEXYWS8xVKkkUdcOORdwxl/ycyk/vhAW0Ft+HZmVUVXS9CuUoktxHyREqxitryfxvwdmthU26z3kmtROTD7KC684NuWY+7/TT73+a2j0XsxXkDViSvHtZNn/4MIDnyHxlEXfHsDlA5hdipmhoY5nW8jC3bzn5QemjJ24sujAcn7w4luw7AtTnTQT4iCZJtJnbpjDqXtpqdo5q+yZ0OrYyU+usNUBk+M8f7JQLOi2lhDdlqVjfcJEdU5EUxE9CLbHPT3miKlIHxIGUF2M23KgTJb+c2znDXdXtpwrTHSyzgkSMe57bjlZdmmxxRC/n6h0F5ktQAOkfhNUv0Jy/Wm85DwizSKuQ0naH+674bsrhlny/B+TvZQSlT5CI+1HrZcQ3sBIbQtUh5CfWUccX06jDhqBsJVG9hGGXnFw2kLgL6w4SCL/9+TNp1Gs4sxQVAxXhe+rBMuQIrB8qoMGwAUTFBEZcer5pJ6qNNo5oHvSALPeczycZdK24vuslZvJ/Z+q79kEn7diECfHJZ4+vdUqmrpfEcxX57p06zeRAOJfERu7B0r76uXGcM+YGMRlPOuzLBuUwKVo6UqX8Pj1679bb94/pzqHs6F5ch/5N0yOx5yu/5lspDPRM/m4TmOeaozZn2+bdjgXKnYzHCYK1yC6ODdLZUOkPEpmr8eya8hSRaPXMPiy5SR+4LTjIrdhU45JNirPL6mx8MBfo+k7CKXX5GdkawjxAi5ccZyxxsWk9aW4QVwe4eTI3zH0qoP58dPQMA3j7BzmM9lDfJYe4yRJ7NprP/Gwp/V3hKh86cyKtqu51zJPv9DosSPAYO5JnkRnRw/73KEps+aUztx/O5NKinbTNzXl+5QPcbOo8ERUq2iSJIz3P8n5Nf3DO3176kOXKLPstxOSJNEvPzHQW66Fi9ysb9zmSG6gcLNhj/QDgeN7Ad5wVf6oVquMAMe2b0/23XbbliePHv3eFqE80hw3/y5oSzoO3U7EeJhFqyrU7BaBa55ra15a85Mk01/D6embpRNz/LgZmanl3uDmhsljnQpzrJWMMxq/CRUgMpxvsqh+jO/V/wcS1fAsJu5dRnbychLZf0rypqDDGlOJ5PNwdOMQS57bQ6nnNaR1cPqwrJ8fSMw8/Rncy+ApwgjoPujAbDuez0RMVLHbvdhNJjQeG3l2TOjrX//9pyuVe/+NWe0t7lZkjDTvvxZt4sFcbU9w2f7El39vhJvfNJinNLbR1ZG+uUXrwW6Xb6dWLE+SRLfsWhsNHj0yuH7Dp1bLtvCaRwivuA4WQBY/4jricOhasn/m2vt2fPnL6QFg+HSlnaEh9KuP9i+9Juu5YSty5XUbfCnmPLJN9nuWfSPL0scrleRwXhkp77dS2bQiwy/11FJVVVOxrdsye+3rP7Xz9a998UheZm7higy9/LrruQp0BdssAj3yCPbPlcq926vV3j1JktRnS2vISmURHURzb7XguIuJBpzs4Ne/dmRPMXPtqvN43xddtDtNkuRYs33ZZZt7zz+/foUZ860qputVATz69KEXLxh8ZvDobhsbmz9fe3rWbt2u16x3+XnB5rNBRrZW/cA1lU8+GNGzE5ITM9kyK5UkeuihRQPr19+76pFtevl118urcJaSe2VrW6scuZb0Wat86tFqNT5QqeT9VSr3l2H0cjMbaNJnKqbmCvcc2779vY91GqvOwou3bpPl11TMqIKuV0313oOPVe/aOXX/+8uZ1i6Rbb6Y9cWEVc2iikZZ+OTer3/t93af+so0X/fMnQ3yvj2X4H4NaUMRMdz/jtsvqrP52R2E6ABuq0nTAcRfxyef+wrHV00fjnMmj7Fbffx/kTpRGOWkKm5Riy+IgkzJUJstpqYaTpYUJ4f7nAWq1buOAPedar9WDF2HHzvSdy6NkNImQU50FiVJol/9av+yhfHRm116flHcLgcGkOZNEEAEcVdcUonCgbLKX1+74dN/Ua0e250kSZ0OaB9RALFQvmBwwVvUone523rRkN/iWkjiwm9GpWg7LL4HfusrkEuYW7dlG5Tojzx4DUHVzUTiUW003l+tLvxLM26UEL1PsHUQehGseY754pPRPhi9p1rt2wIc60DqjBhfkUhcPU9HXXbttYMXv+51Q8/kNHZUVydsmzcvW+we/YEIl6q4oYCLikd/0//9F38XLlhe6gn/HuRmcVla1CzNRxZXNfl3HvE3kl2wqVJJdnZikle94Y8HsrGxDaUe/SWMG9xYIKoTGEkeiqcaiR5w2Oos+KvLLttchXqvubwHid6q5PSpuEnQ2C3aWakkV7WPmSSJfvUbFwyW0ujDbtnNiqSIqASNStjDwE3ttFUqj0Rp2LU8ePRRd7+6SZO6mmsoq/EeYBYMsg1z5cVWuYFSOSIdM5BDYE8CUPf9SGMvImuwFOLyJdjoCrj7mbkZeCMs291PI1pNVoTqiB7ETx6j96U6dv4xJKQgkGXzwS7jwgMPkST1001TnL4e5GScczvfRJyWLekcO2m8k/yfJFqtXrA6RPGnIPrP4De4eb+54Vkzxq+BZ3XcU8AjsJUov68S3Zux4M1ffGpJOZfiOp9MMeWxpPZOJXwUZL27q2f1vN+sgWcNwMuOvxENH69U7nvNuBqdaU01KEgZJ0aIVUOs7ksz+A2Nev4Q/Grce90LWpv9muFuKyF8xCj/1k03fXL+bOIR43qtbm7H3a3wSkPLbCD9ov7Rr1YHr9iya+2kJYc7I4rE0JCiGmHEOLEEjZQwX+q22qV0r4j+O5ylbpm25iWPrQTvF5O3u0QfzbKB1ZP7r1TuXRzX7UMq0cfBf9VhgWOYNcav43if7ubmy8F/TSW+5/zz7feGFv70sKg+JSKG5/RhRSygyKpG44LBibdNYpr5MlFdKSqtawORO5dWKpsXTKRvm6mzGMIyEYnHx4AyeE1cpkioM6KIvT4rJIly/3f6gdcXy6AoIjtI64dJXHnx+SHcniCKR4EU95WIrJ05x7oN0wljSaLjtsK0VKHUs5YsNZAU9ypmx3j+sjruu4ii44hAWu8lKr2Z2tjVrL0tym2ns4+rzXecHObzI8aPX9zb1HmpVC9YnRE2icrNbul890wR0yYrLbJFtJ25upu6W+yZXy4e/vC8kcbNUyWacS++uhuOrBb0P7r7cstSLVxammcESB5bKK7uZu7Zmgzf+NBDixbkc+i1PI7eQUxx1KwRu8htKuH95o1lZinuZjjmbX2Cq3umjs8XLb3rByd1PcwmaPv7I0L2zyI6MjHeFXAzRG6MNHzugqGhjZXKp9aQd2rkJocpfTcaYybjBUscxNUtU7N0tbr/IcgVbhYVvNha8yKKgONq1oiRaL2WSu+f2HuirtHHReTd7tni/HwzBVcBXFAR1bbzUMSa46+QEH9w4dDQ73iWPSOqRxAMseJ6ZIjo/FJJV7aGK87RwnJ3W+qeX5e2/QfNGmsLm2lrPlJdhtsCt2J/DNEA5nvghT0zX49JmCsnTb1+MaXyGiw1oEaWfoOFHM+LSVyfYjwOHMctIksHiEpXMbCvb+blpAtMJ4s1+cLi564h6vkAWTqAqqL6NHbyAY4+MAoYFu3A/BmcCDMQ1hJKH+NY/MbChpnHSs6Clok7zCgl/ngwz444x8JtK+snI0kSrVQ2rXDCx1R0vecXILeL5a/nVELphIjsNfc9IcRDImEiE/RMRWWxEG2+9nX3XXLyZKaTw2HGz0noBe/L/1VUo1SQnKG17SqCmmdpFHpeE+L0LUmSqKnXJ3QoqHtWBrnULFuGmZL3aaKKeMs+JCKIiLplkWe2LEjpjmp14eBkp087kiSxSgUT9+2CPi46yd6UF0lWz7I1IcT/u0v0j9dtuO/Prq3c9+bXfnXJsi1b1kaTmWSppOZNHWe80ImD+EoRvcIsNQRVVUSDFT/bhIQrcfWsHrn7r61ff+/VkOhll23uXV8Z/AOV8KtZNtYLFo2fN2IaolGVsB9nt4TosGioC0W/goJFWVbrDaXeD6Csc2cvIupe3C3uphppBs0QGBLy1Etcf8GzbAGeL4ZXVLMy1aAeqOQ25MSqVbRaXdiL+s+6Zf15VpxAca+4yN9Xq0n6Q800ShKF65RM14MMgqRE8X5UHmf32nSciVn9ScZGnyaKQQKIVuixaSs2FCgW4ZMyJZayaPEyNn1rBfftXcnmZ9fw2b03sOQ7mwjRf8fSy9EIgj6O1d/LnWt35IxPjLtW7SPLPkb5vL2okku5cimBv+Wz+/8rn917Awt3D0JVT8UoO8dBdsT0XChx1yLwfE6QnKtyTKeBiT5yz62CrrlDRl+8WQjXFA/nuKoooiaqO71R36QavknGaCb1derhXaJhvVsWk8cwqVlmqqV+Se0DIZTeZ3gqjk728I8nZmrY75buMOe4qi4vJKeBPPOkuZdHZo35SrjuoccW/XUkmRVse1IuRe52EpW6oI+aNQ4gUtYQXeKWXTJZzc+7tyvAlkFy5NRe4Rf3Zb7gc0HjNe4sds90vB6ooI5hWcMQ6ROJ3i6kb45i/+bCRcf/qlod+AJwqOmpbzTESrGk3kZ38yxwN5HIVGSve7bTzU5I0NWIrMOy/lawQ26nVonVqN8CyWPnnffpimjp7WluP8sZjjuCGnAo8+xz5tnfSxSOq9sKcf6tiLzV3fpaHmGP0sbYAkF/CU+HNET1jCxu7w+4qDlfCfDahs0v9ZTWuhvuaZt06nlMs8vP33LL5t4vfvH5WrWKXX2j9pbSsAo3xX2cRvdsGPWvz3wXT4OzYqcb4WX7FuPhKtJ6nKuxjd00xiZ6qe+6aIRNzz6I6M1kYyC6CgmXksie6SvxCGCgcjla2gyhmTgQgffhtpigfWQpwGG88RUyPs6RVROl6MSVIzzEon0fpjzvD2iMrSgkXSPSd5Lpmyj1PsqSpV9G9lQ5fGR/EfIwTbmzM1GxN26EJOETu04ul2dH3+S/IhHuhoQzn37PDAKf+NWxR39/Tc/TZ9zPHKAV4tPGpAQbPHpk0CX+JfD5tN9qriYiJ9wb/3HDhmOPNjfv2rX20JEXXzyo5veAXOHuxUPratYwDfE1sTQuMbfc09tWetidIutEdpqnH80auj2ObbQRxgaiLHqnavR+t6y/RbXg5mgUrQhZulhdzCfFIgKIYwh1N/usRX5P5DIE9ahhsiYS+SOQi/OiGQV7dVPQxYJeDDyZJFPDh5oowmSoVuVLnjUGRMNHRaI+LyQ9mhlJuRqf21CFPjeviMrlaPn69Rs+/alq9dhjlQo0GuDixaJtE9ITTTQC829CfaNQ3yk6r4bbYkPuFA3vxrK+1jUS3DMQW1epbF7gkv0i7oMTcyDERMOwe/qpejn77BNfPj5S/HCgUhnYax56VUu3uzVyVb4ZDKa6yiwbVbeaIHFz3twzcF9dqfzU/GolGSZJrFTZNGDua5quxXH2KCi5mr36e99rLAP2QWKa3dcHvpKiDB5Cs97CHjLfe0axn2cjfiRibPrWKuKe1aR1I4pr1Eef4OjQMZKLWiXDAHTvw2SNEZBeNJSx7A3A508dD6n9aLSu+D9/EIpsXxr1lHweTiD+jwhD42M2+22mG76w6i9Z8u06qncRxVcDZRpjIKEfsVuReAORfpNFS/8W+/W/hOTI5MIas3fStIjPaSharqzE5f0CH0T0g4h/UNo+p9NG9QOi9gF3W3c6FJ17FGxSvJYSLnbzy3MnRpukpaqI/7Xasceq1evG4yIvumh3uviCC3YiPCAhGqG4PXMV1k1hIHO7HogmhDMB4KYhOu6SbQr0fimOXzherRwd/cbDJw6JN+7DssdEI9zb46QwdwZClg20r/Mz3qNDblPXrZbJPVE2dLBaPToK3x95fWXom5h/yt1TL9TUNptqZMgrZjNbuap9dHRkJPoTJ/tdYK+GWIubfeI5NhklmbpZn3t2q0rPPSkL3ghAb/uuzZNonoupB7sbjldh5ESlcnQUjh5Q5L+CPENbFXvH86ElLDUdW6caX+JmOm4eaaq41tiRxvqnN13ZZI5JEat5/DCBexxLc2bbJMrVzfpBBtzTWq5mA1DYFcNSiBZX8pU71Sxbi2XL3QxcwN3cyRMn3Ey1NKAlXdOkO8p8qbstd2tZs91NPfUdUDsx1ck3C5ypCJO4cv93yki4nLS+vAinOU4WHodKEaeZaDOPmedX78PZQVTKGZzZhsK5MzM8HSUdO0ha309aP0BaP0jWOIGIUe6NCAFCWM28+R/B5HMsfnbdxFqStOIan/+fX6KR3oll7ydLdxL1KFFJMQNPe0nTDcTzPkKJTWzad3F+bMtkMdFJMytPdfHMFXMgSorIqED+cUZo+0xoU7RpfSb9PuowKh3X3v7hYrKKXbzv64peJyrz80IWkjNJF3PLhh17II+N22btQc4PPLA7bbhvxX1IhOYDhLtoljV6Bb8cvJ/2cnCOiahmWX3Ig26tVr9br1aTwsaTWLX6vhMmfFk1dApk70uRPjWxKdIjmCg1cftiFA0drFQo+kvSJEksy6wqovtVWyFN7m6ImogOMkskSWK33PJ8bfsjd/1pGuQNZul/EtHdGnpG8WAgaev9InnxCnE1y2K37OJI40/Bomva+2wG0DuF9CiyY/vWux6qVpO0SX+lgp1/vu53T3eIaJ2mKNw80r2XNLrW8pTGCVCNMOVvH3voPUNF8HdxbP7/9q13PYbzpIQSTAjeFVWVsjsHRQPgzegzk1CanyKrxvcN4ToJIXYc1Qjwb6roweZS9OY+X+DSSmWccV+C+4LcOQOCpqLhmEn29Wrl+8OTVwSdHs2XPGcnQY6MDRDF16MaUeqBsZM7iE7sbDk/ig9AIinIA2SZkaVQ6lnOWHrD9J27FXRuh3Ataf3nSMd+lpPRzxHkZ2nUr4lUAr8AACAASURBVOXkS/8HIjuAlNEf9FMq3Uyp9//js/tvnVJkNxEjuT5l6JUHOLzyM8ThtaT1X6Y+9nlK8UE0GGZG/eR8gt5KpA+y6G2Xw8ZxJjnNu8QnqduT2y2IuYGnhtfBUnJ5tPPH2769rQ0pWNGWVPxUl3ASPefAf9SxSyNCfDWiJmBN+5yoIqqHTfwAdPbC+1jPQbf0cBFnaOMrO4orooOO9I+rn+MQBEZcs1pnlVYONetHTiyI45GgEaRtFq6m1wIDHcnwY3n17ok9RlGoC+SFSGWCGwiE0yrc25yHbzx858Ht1aGN4v4rno19VFQeEo0Oi2hK4RgaL3snglmmDstd+DCjcVSYGZjw2hJBjCPFSBPu48sue76myAtISPPzLc5B8nMQZRVu88enq/g2S8F9GtNOPoaITPrdEcFAyiqyF3dEirAmwRR6BVlRrWJr1xLltlyMgkE6uh2V/VLEznrWKLv5RbCkH8Al/KxoZDhWOHNURA+QsTe/dKeTauhn96wkYvREK/BsXe5gQlGG8f71fGbPGyd8Fu99I5959k14I8ZtBFFDxBC/iS27TnEfSUqqdY6uHeWui0Z438tP8K5XHuLoXzzO0OGP4GPvIEv/BNE6acOwdDUiG1my7JKOITxNafKOl9c48ud/g/a9i3r9DtLGnxLFJ9AI6jXQsJhS+WMs3bOqGZI0UcX2JuMZt8xPbY+jzSvj1BCpC1ITpCZyZh+EGlBDfHoJshN959SLPSFPPHZncOJdVgwucjzKQsfAb0isp+fQMHBMVWkvC+wO4tILEkNhMyzGbf2djjKvNfdoUz+104RMYbyGTX64kiTRRqTmkp9H03c/V2+gavWF3SLH/ou4v8fTsd8F+WNURmj6porxRFDPUhC9JoR0DWitKfw0YwUACFNfpM30wsyzurTJSs1XiLur4QvcPPY2ppFL9lkaEXUMiG97kRwZZw5FzwV6Ef8ndxsZZ+aOmmW94K+47JYl5YGBwWU4a1pFkQ1RnkD0ADC+sJ1GpeVZyJYmSaK4r83PurjOKlia7g2hdPA0pr5F55nGQTbVV/cKyCCWKY0xQ/RWouiPCD2fm/iJ/yj/lN6PWx9uSqMGGl/B96KVM4fYOJTHtPOyC9uMw2v2kcUfAdtCFEd5LCSXIvqOZsjYVPrb7J53Lh3lhVXbKcfvx+obCeEQGnImKXI5pu/gwgMxietEFRumMsJTqN2ipDmDo+ZCzdXqLlZ3L75ltm3qAjXwus2kBHSi7xxGII0/jrnEGkkeqNuyXTVvXJd6o6EdCysAVKuYIB0YqBgaVCZyiVlh5uq92Sn3mA06BsmfEZqmgSStVF44uGHDi19qjI1+yN3vEuFA4T0eH89xVKLY1K91UqWI5/TCwTPZMz89/cW3FDpsXso8br2AJrhL0jRk07zkmpCxcRW6SamBO+UU9uCyVzQycTcH3LNYkRXn/yCdLxGXiJb6MENENEsbdXWextLv5jZJDMHcWCoNX/zEE6v6EFbiha3U3VTDCGL/dGYLuZ3FszLOYPQNSGFL1qBEpQFgGSJLO390MSGKgNzuV4oW4375zI4agU5l9NvV96MrhsjsHiwbHY+Qc7uVe3f1zZgt01L/jRUHRvDz/gRr3IOEEUQhrZcpla9mNFsGc/AEpSmIWj2gGJh625uh+aKcZdudVHBcT9MGOUfPcLWKVSpphER9orlHeFzykkLddclVhZz28ZqGDr2lkk3jUUy0Urkwdk72NVlqy/nh6m41F6nLhBqJZ4hxlTLMvN8s0KJzbkX05hxVKsnw0MJlWwaODcVBo4+5Wb9IW9FVHHHWgMduTRUcaIsBPRXG59llvOakC3VEwFrsMZckJY4yZszbdbfzRbStXsr4CGnJ5TBBtnor9lFxjBAPYukCsNeqKJm4iUQK2d5K5ej+rdsu2Ccan3DL+t1dRWxQRFaMjIwckuCL3VtXwtyPoZxe9kzz/Jrc8UxtkPfuvRT8NWSN3K5kthfP9mAetdJrOw3tA2i4FKxMo94P0ev4+D99ie+fGMkXy/r26dHRYq5P80f7dhNK64qCFSuQsJIkyVMaT/UCuf76lOQRWPgzX6As/waXDQgpqsvRxjIS2TdRxT6ddMKNG4tDPBWRmkNNoO5IzZGaS/E5jTbqNReti4fTu4RzJEHmapSWaa7SKC0lU3Nj4xFROdQ+Ty0Hji2uYx09dEkCjdLIgIsvNjOgXfoUHDuheYXjlq3wNJhS59PPOM3whNPs/9Q4VQBztZqkg0d3W+S6WzU6RFtgeZ6P7gAxPiGb5bTombCvkJfTcx8SpD6+zEfBdTVEajbVeVOcSxF9wEpErKm+53lNggjHwWrm2T+4pXVENF9SRUxF+qGxGPe1ZllhRwSQJ5MkMXU9KKJDCCaCOl520VeGYKtVS3mWkGOiQS2r71Orn17udfPkzxYRNxKXI/KMpRouG3n+lb+Enn8bPaXpP0HuIpSeyV9KppTii+ntWwnbjLMNoHbJFwVzz71sQeaf4ohJqBiMHaFeP4Bqmj/O3otob37Krb9nhsjNTWuKmEEuR07Rfjrxu6nPjpF7XSU79xLkxLp/UKmgSZKk69dvWolk42EW446/nA8edOGo5OEhxc+Cu6mIDqpwCbBzciB1ksD6DaxRiRabp4wvN5BXuUnF0n2GRHqGrOicmmDPoP9OZdSa8zxRwk40l9qzMnh5siMwd1n5CYR+0dzHebr0tDQANHegaOruB1TCCcda0qKTB4wrVyVJ8qVOmkClcm+fua+T9vvZx42jB8BHXMMeNfYDa8wzlTy4e74RLhVhZV60Q3C31Mi+AZAGORwsPYSzGjBRAdFV7vYDFaWotI5IhEj69Wr1fSfOrIiwnNnNkiTKsn/fT+Pk68kaoAFE9yAndwDw/JJa5wML5jfwjv301J9Gw7p8jRlbidvFcN0cxDrnWWb5v2ago62c71nWg4t+2vAf1HKeZNY+SR1Y48RMjqntAm2MXyH1fGU6y4qU2BwtBaa1TSe1WxARyzNWbAYJshN9p4/JD0ClklCpJLr1Eb9LVPvNsjw+zwsmaKkiPEua7XMNI7j0uuQ5u7ntSGNxfxvwp8UImveLwoVRaiOvV2WBu1vTGC+CqZaGU8+eELefZ8JbY/bnNc0V4mwtKGf2LCVarS5a7mK3O/5MpXL/1mr1jmm88HDllQN9mcstkqYrEJ9EsIDotwS5zJuhQPlmbb+zZsbE2VEJqWm6C5FDIEvHexHUrAGU3vjwwwvur1SS/fnSxq2eTLhRJVpheXC7FhRansrOznovwyHzuro+jdvaptfZ3frEea2jA4ghqoAcDsiTAFHmQ+bZXtFSxTyFzFXUVpl5LJKNu/TMGmTIGdZXPxsv9kZo7LuEnvJqxk6ChgjsSYLlDq0Z6ywmyvFVIyx69h+Ie9/C2EvzcesnlK/ip1Z8gUsPjHB62eQth9GSvQO4ryJLc6btNkw9O3L65/eDXlwGsbQo2yajICMwOdVwfIXA5k0jrfY0T4umpRTSmqOWhzugrcfcaQmUxcbJAmZ72y0X1CSawYvdib7ZY+3aJB4cXHS1iS/1NN3nrieiKMRbt/pKUb9DVG81y3TcvuS5ucXhYObp0yX1Iy6lRxG/Ec8lcgTFUtMQ3bi+cu//1hjr+X96eg4VMWoLyyYnbw3S83bL0phchcpVJtHIspMHAjxs8PNeLHrkM7C8TpjgZsgdSLTbICevHHk6aB07OyRJYus33Ls60vPuzGxsmVntmfWVz2zH7B9V2Z8GhqJMLAvSGzJfaeLvwv1N7lY4UYq5QcnS2qiKPezwC+30nO55tJ+/4+oi+ywd+6ZoWGd56FbO7NxNlLUhkg/Coru3bHnhcJKQVqsXxnnNR/+ISRp5U5b1XMbVEO03sr+76crjI7t2ra0NHRv6Bwi34pTzQPJ0PrABsd7WlZKdwJE8E+aukfXXf/op1WjY0rQ/L4jhqwVZbtbIox60hFu2uyRHnzytk++E5vM203KsTSSee5Nl6XqcBagaGp2g0djG80PD8MDMYyWJkWxULNpO/eRhRPoRNczWMy9dyrZte1j0zkkHzeKhXvJ8GdffptSzgEbNiGIwHuPFVUdy73el5c2eaclZqkr2skvp6bmYRj1Pa/TsAMYhEtepSy6cUT1IrUsza2Py8ZM16RnahhgK0YTg3kk4i3qQuXTzU72m4VfE7TcJ0Ql1GTUhQhlAQtkss0lDGGAisr3k8QGIR8xH/0IlrMN1QdOp4DmTBJcPx3Hj1akt3HbttYxmLlep6O2epUvBtWlbaxaeyCz9XP1kOtRT1gjBcLS9HuRsMZVlZMW8hDNijNB8lGdPS5IkumULkWSsymx00N0jCdGlAusMUhOGg8mwo6mYlc19UDXEmRW1KNqcHqKKW/b5RoPDUezllg9b8NNw0sCkF4N7/gIJ/ldCuFHUV7lleYiNoG5ZJITbHR+8YHDwi1+r+rGgtVWWydtEdY2bjWsADiaqdcuyh+aVSzvzEKPd6QvbFz0j6BHwFYVwoUBuG3Mxx8zddo6OlIab8/a17faMWXZCkCKHXGKYGHcqKtXqI8k06uypZ2EqNkIyUzTARqCqLBlcisZXktbLedSF7CewO2dC15/aX5CIkTxygMVLHyOetzZP99OVqFxBkuxm0+3ka08V8OKZvo4iYHsjucpaqM6Lvr0Az94KelcRagRuJzC7H6rK4LLL0W/3k922k7suOjI1pKjoKxHj3r2XEOR3SRurwYxo3ijpS9tYYIcY6iRBTodpHDgaxtLM4xqSV0M5mzx4AcMhUzk9G+RpPC31uBzHKQs89zAOoDIghSrtZHnwdrPb3GZlInoos/pfBV48AZDFi/5eG/yChNJveFYvN1W+/CR8vov8RkDfCpK6WX9epqrlnRUXE1V1S78QGPt8Z4/zGbpG5Ix9lB26On0MDv5Ur6Gvxr0XUMtSy/3FROLaj0o/4uNOmMzSybdWKqqK2ZMe/F5ixnn9mUnAHc6jAcdeHHx84cKhTaLh4+QRNCYi6oJC1gv6JhWtAKPu3gfEZqZ5EXsHxDSUEOdxs9q9Dz74nuMA1eojkbL7oIscQFg5ZXwRUwnHzPyfb7nl+RrkNuqr3pDuK9X0gGi0sjBUNZlwbj7FasC2fP8zWXvHARRLI5yL2LT3ZngO/Fe1df81K+Y3289C9DLDWIPIxUVoD2SN3YTy1NUBZ0Jyfcpn9j6IZe/GHUKIsfQm4E8mO+EQYsT72D04zIW/njK6OyJ6Wxn2LiCTdZTC67HoTbgtAIworuPp54nqW7lwRR+mb0PCrdT9m2za8yD+rd2kpUMMMMxL56WE28qk+xZz395LifRdIFdjmVEqK86TpKUt7H5FSlIwtdmZqjo/sHWLLcJriMbkthhMMHVTkyh32bppvq1gPqKFimJKsX+zPwXIZggU74RZPjdJkthrX7u5TMziwnsMnqdw5fbrdkkjV/5D6BnNvPG5gD7ctpzB0A03fOIPGo3yAo3i2y2tNyWaXDV3U3fpQ9wQz+v3FZKPoIiqmttXAvLhavX7w5XKwl6bUUL/yUA+v5+YX4rDxS5mZm0vnPwFpLl0MEntzf/Ns0tCrJ6lzxD8w4svGHzm8IkXFnQebXbocGtYCKndfvvu9IknBv7kpZPyStHwW+T1N1NBiqfBcJMyeWFammuku+dZPSGU1PG9Da+//xtfP76nybSq1W122WVLDp/Xlz4jGq5xyyLaXroI6iIHVdnfnDOAN1yVnPhadeGOoGFDXui3FWCV2yzZL954uv2Y00I+x0paLxNKt1OK3zTrl3CWlUkb/eBQikcYe+kJDi87cdqLcIlvJ02PoNFg7qxhPZv2DY4vP49ofhvI5YSwGWSYWqNOiCKM+USlBZRKg2SNATzLmWpcTmmMfYGGf5yja0+waM9yovJrEF+KyFuJz9uAZ8fRxnFG/BiM1ElLfYQwSFxaSv1kwWR7FPchxkY/xNE1+5vnNlHgG1dX2yeu2e7MhcolTOCkZz7q4qPuPiomNXcZFfOamNda2/Lf3bzmxfb8t3w/cR91l9FsxjjITvTNHqVSvdexQciZFS4mxSdPe5O0CKlINcRDDat/eNEFA/8lL4TQujGvuebEIZEjv25p/ZOi4VirTmOzVqNT2NVM0BTHVCOTEB9yz/6vQPquavU9z7Q7AYq0RcPF2p+pjkGzraMoDMtN+ovtgbT15kvHf5dgrRTCTjjJeICqF7RIUQl4Fo9DVupRkFS1NKIarIitMRFJBTWcPG3O1fJ2HjKjoZRq6DnmWf2PLbLbtq8/+vBFF+1uuw/yfvL9i3Oc1eOpNK9JM60xyyIFuPLK4yPnzcs+hGXvFaI9QeNiPClSIL2Nkef0qqppKJ2wrLElqzdu+Ub1xR2txcEAEnvqqedruD2hWjohzb5a18c8G9sD9XEJrOn1D/A1MwMN7fsX9gd/cmysMTQ5rXLWEPL7BAHL+qifXEy9NrtPkzlqgLQxhPmjpx2ek7hy56uOoeEhQpQ7Yks9g3h6I9Rb9ImmqPQTQoWo52ZKpbcQ4lsJ0QbMLqZRGwSUuHcUZD+1l95Pze7k6CtypqZaJkQpUZybIhq1ftJ0JSJXEKI3EUpvRsONWHYJjbEBRCGeN4LZwzTGfpGjax5vJ7tDPcjJjHBm8axu5BWfFdP8T4H266gdtnVoN3OwZ7JBdqLvtKSvKBL0sKiWTaQPtzJ54QkDqSMyjPsQlu0Usb94tPrbDwM8MMkWXTwQtUrl/g+kfvKL6nabhJ5LgWW49UlegFVB6yI6jNgRS9OnTep/dnxo0WO33747bYZqnH9+ZN//QXZYNX7aMFQL35UEGo2TB0qlUsfsjgaMlDXeIRN0VDFERyRNR4AR1Z4draI2CrghOuI6Ntxxek6GNJSj/aj0mQYTXB1MpaSucqjt3Dvi8eoLB6+5ZvBOVasgvFajaK0QBtyZD152L7SWfC2WuiDH3bMhz+o7UR5UOfbQhmuxR5PEEhK9+sYoVQ0HBN1pmk2gJ5NakW43MaQqSUA0OhZC/DRCLG03mkjpsPjJ0eYSq0mSjFSrfLbuCx8LJreFKGxwD0vzXG0rjpVUJIwAx9zGnvEs+++qjYe2P/q+E52X+YVqlR0i4fEQlZY1tzuYalxv1EYeqX69FarTCpy/d6e7PR6intjVinPNXyBpdvJrPT3DwzOVmpsWlg0T9T4DVj4jI5ijBUNTRr/3GPN69p7u2i7jCPwVIaxFepSe82Cs9mpMHqdU3oPQh3kZiPHm85NnF0GooTJKo3GcNN2PNZ5ArMp7Xr13Qmrh86v3snTPHWR6IyLXEc9bBT6AWR9mEZiimiLRKBKOU39pH7XRv0PCF3jPq4YmO67yJ+uze2+g1LuZdGw5WTadwp3r6I3aX/Kq//W2ZFvFkkTs4986uQLxN6vPQV5b4eixzKvvW3teHmN1775V9ER/i9uaYvW0Dge6EfVAlj3N83922UwXr1K5v5yFk6s9s+UqMmDIAnWPwVLxMOyeHVHVg8C+SuXo6GzVmZtu+uT8kZFohUS+SmCxYX3iquJ+3NWPqLf6hElMJkn0tV/tX1YqlQbaOWFQVxdGouzY/k6LTV150yfnxyO6KgstVScGsiAWsrGDJ08Gi+Ppf69W33dicp+33bYlfv740Apx+jJrHRfU1cZKx77xjTtPmQPcZBqVyr19WQjLQ9YYNNEBy7yfQF4d3RkVYVjdh0APQe+havWOGsWSuW3ZNhEsXJGpz59MTzAZrlbv2teJhqtv3DQY123p1DeLpmPn6/6nvnjnuFzelOB27VobHTl+fJVYusKdpYL3g0YOI2I+BHJo3ryePQ8++JvHTzUHt922JT569IWVmUpvO90A3jN28B8e/A8d+kj06spPrw1ZiJvX7FTXa1b4410D1MMymqnFTWGoUXzP1G7/PxJljCF+75WHzogOgHt39SHzVhIKPpPKML3hEA1bTqO+gCjqwzxGPcI9ArW8iogWoTc+hDeGOLo2v36d1PymY2fZoX7Sl1biuhjxAdA+3CPUR3E5TqZH0Jf28Z6fG5qO3JzbbNqzgZ6+zaS1FTmX7Yj8DdKo/w090duS766oJ4nYJ58bXeaZ3+yEGMfOyktjBqpIJtX3ru3J04U2P7sGjf8WfNW0DNLdKPWAZzt41yt+YeoOE9G+/nG+ZOtLOjT0Xbv9dtL2dZFP19bTYgxJBBcW8/jdZimufK3safucSXWa/phKBW0vedUsk9XcNt3veYzf6fU78zEdeimqgrevTz15/NYa3zP1e/r05BELE49p+3WasI8Wc06SRHftIjp69EJtv4ZF37Ocg6nX9NTzOPGY2V2vU5Exi3VgZoWqwjY7Y+lxCj3NcJxpajlOe9wM+0zYv2CUrf4Vqkwc8+4ZUxJzbrP52Wso9W6mMbYan4FBaqRY+ijiv8Tzq4+TiG1+1hec9Nobxa0X1bP0oBpmmhJk+/f//P88kCSJsenZKwjRF4EFZOn0EmRpHmTpdt698vrZj9fK8ICm6jIXC4ZN7vfHbRGyHxXaM2pgbub63GFittWPN61dzAKniovsACFxZelzl1Cat5n62OXj3qGOfhkB1b1kY7/MC6/eTSJ27y7vS8NL17iEQU5Zx/HUUPfR1OZVhx/gRJKIsXnv2xG9H/N4gkNmAn1uxL2QNv6ad6+8bVYBsF100UUXp0CzWMUwaTact8fTuXJMKExrRqmnHymtgbtJ3PXoEDVTjoh7TfC647Uz/Yh4aipDw0O0ORDCL6AhHndZji9X10afA5aBUtjHZrn+bhdddNHFDMgZZNw4QTZ2pChZNFHymqzSZul84Cou/PU4AZLrJY0bHBHXE47XBK1LpnWh7XPKttcFr5tRH3Pbz7a7cxru/04ZYUPhYe6cqSPFtiyFzJ6d+ynqoosu/rUiZ5CH1p7A2UUUj+YS2jRhMyJKlsbEPeupp2uboVBHh847JioH1b2mntZUqam3fU7ZDjXB63h04OSreo/AxrwOx8n6G9FwMWld8WncP05RXUSOIeSOnblcg7aLLrr4V4vWUonC0+CdY+Pa4Q5ZuhbRm1m4u5ck0eR6SV+M4wOWlo5khLq518y9ZqH4tP/f3m7bniHHYi/tTUQsgTzfslS6sxhzyuJTEyGgYTcuh7r2xy666GKu0JLKgj5NOnaIEGkH70wbXHEvA/8WDVfkbnTX5OVSmzcW71NPjyleV3wio/S2Txtz1NTrkqbH5WR939G1jJK4suSpMpK9EwmvIa3TvnznFIgYuGHZDsbsBFw3RyENXXTRxb92FG5vMf7XoSNktpWoB5gpk4XcIQIr///27ifEruoO4Pj3d869972ZvsQYnTCRYEIYUpmFRBoGXdVAd13ZVpe1QWiKWVYLUkrvUIrYLooUq6YuFARtCy5aKaWbDLRKrS66KLY0dkwlZpKZMB3j+ObNfef+jov73sub/2/GSSPl94FhOMx973Bn8eOce3/n98P5H7L/vapgZR7d6RPS/O++xrRGuaROm1LGIJIUErQQ6fsJWlR/06IUuVxvNqY/Or7vWt7dGWvjXlz2CGW7AVvkcImAS66i5RvMjy2Sn7zpLWONMf8fVi4Vf/HPu3H+LYQM7ZSFiquu7tWHFCWtKaF4lVA8ztzs1W4CZh6jOzhDPSx/spdm0mg5XHSFYxnqaaaFoknQlk+GFubGaeYiSn4ugfuVQ++fILpniXo3ZTtZVeVj1ePRCN4r4v9AaJ3hyl0fbPsAvTHGbGDtXvr5f7+C9w91muC4zXfbUcnqBWX7t8TiKW6Nf+fd8dAfpPJzMeEIyUhzLoER5marPtj5SQnXM+MnYeTBYZyfIKs/g8a7KNsbTLpq/trwAq3mE8wee2GrrHhjjNmO6+Gv+3Lj7L++giQvEXWUUjcPkFW2tuLTgJbvoPpL2vIa82OLOZOdjhAb5CT2H/85cP5OvDyE84+AHKVsb/0cMaIkCSBTEB7mw7FLtno0xuymleEvzx2HH95LO/wY5Nuods4vbkkRgbQ2S2vpjzh+Ra35JqfuWVj3HGg3kD3z/ii++Bo++zqRE8Sy0TvJM8iczjtUH+Ty2GsrvtcYY3bB2kiUR8fBfxwn3fNzQjGBbljdp09nJQmQZAqySFieBvkLTt6mHS+RyiKxdJRxP94fBb5EZILa0CHay/XqxU/cOjjG7vPPuqLlr/mweQpWbuuNMWY3rB8gc1GeO/8NstrPCMVoFSQHLNsdY7Wa9KnDewgBNFR9dKvVaB2fgnMQ2lAG3TSNZ+0EikuA+FdieYqZV3Zem84YYzax/vY3jw75wu9pffIsiEOcDlyUVsQRoyMUyvKSom065wHrIBkxQnsZlpd08ODYPd0TOw165AKqP2UmTG/jXo0xZls2Xhbm0XHLhb0Mhadx8k1Uldh5ntjrM9qp5r3huG+K6+lBdBqUDPD5vjFU5eLTbJ6y/AHt1svMjTdta22MuVE2Xr3lonx05Bqe76O8iEsCzmkv6PWauMsm41U5jL1CE4N+vvsVUq0c01qL0H6C1L3I3G8sOBpjbqitHyzm0THy7gF88jhJ7Vto2IeuetPcW+XJjRgr3iuRi8T4JKfHzu74bo0xZhu2fv6XizI3PovwJGUxSZJdxGdVWbQYtfNWmV7zrN0aRxSRquct7k20/C4Mv3xD/xvGGNNnsLfHuSgzx+bJ0rOE9hkiUyRZwCeuU0OyIn1b452Pq+CbZHRSh14gLJ1hf/t1Zg62dnSXxhizA37gK6cmI/fcqnz8wHka8+dQvQJ6lNrQHlQFYlldGGVNy4beKrFroz7bUqXwJGmLMryDxu8RWs8xO36JuRG1Z47GmP+lwQMkwNRU5H4RFh+4xmO3vcFXH/0dZXsJn9ZIa/Wqx7QH5yIinf1ylPWDo4A4xbkqenrfojZ0haL1JzT8BIk/4jvH3mbiQCA/qUxNbqf5tTHGfGYDZn+vo9eshxRnXwAAALtJREFU+8uOO0aPojIBch/p8HGkPEQobyfGYbzXNdNEdagqIk18chHVC4Tib0TewvNnTn/xam8OSwI3xtwkOw+QcD2Adc9b73+vQcYhXLyDUu9E/GHSZBTxDaJmAGhs4uICoZyB+AGlTEOcxV+7zMzrrV4fW2OMuck+W4Bcrb8Rd34u4fCRhI9Dxp7EsdC5xgfFF8rwcOA/RwK5hF4tSAuMxpjPkd0NkP16W3BYWfJssjPu/LagaIz5nPoUBSp4D1AF9yMAAAAASUVORK5CYII=)"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3o5sAOfwL5qd"
+      },
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/llm_notebooks/Fewshot_QA_Notebook.ipynb)"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WJJzt3RWhEc6"
+      },
+      "source": [
+        "**LangTest** is an open-source python library designed to help developers deliver safe and effective Natural Language Processing (NLP) models. Whether you are using **John Snow Labs, Hugging Face, Spacy** models or **OpenAI, Cohere, AI21, Hugging Face Inference API and Azure-OpenAI** based LLMs, it has got you covered. You can test any Named Entity Recognition (NER), Text Classification, fill-mask, Translation model using the library. We also support testing LLMS for Question-Answering, Summarization and text-generation tasks on benchmark datasets. The library supports 60+ out of the box tests. For a complete list of supported test categories, please refer to the [documentation](http://langtest.org/docs/pages/docs/test_categories).\n",
+        "\n",
+        "Metrics are calculated by comparing the model's extractions in the original list of sentences against the extractions carried out in the noisy list of sentences. The original annotated labels are not used at any point, we are simply comparing the model against itself in a 2 settings."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "26qXWhCYhHAt"
+      },
+      "source": [
+        "# Getting started with LangTest "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "!pip install \"langtest[evaluate,openai,transformers]\" "
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yR6kjOaiheKN"
+      },
+      "source": [
+        "# Harness and Its Parameters\n",
+        "\n",
+        "The Harness class is a testing class for Natural Language Processing (NLP) models. It evaluates the performance of a NLP model on a given task using test data and generates a report with test results.Harness can be imported from the LangTest library in the following way."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "id": "lTzSJpMlhgq5"
+      },
+      "outputs": [],
+      "source": [
+        "#Import Harness from the LangTest library\n",
+        "from langtest import Harness"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sBcZjwJBhkOw"
+      },
+      "source": [
+        "It imports the Harness class from within the module, that is designed to provide a blueprint or framework for conducting NLP testing, and that instances of the Harness class can be customized or configured for different testing scenarios or environments.\n",
+        "\n",
+        "Here is a list of the different parameters that can be passed to the Harness function:\n",
+        "\n",
+        "<br/>\n",
+        "\n",
+        "\n",
+        "| Parameter  | Description |  \n",
+        "| - | - | \n",
+        "|**task**     |Task for which the model is to be evaluated (question-answering or summarization)|\n",
+        "| **model**     | Specifies the model(s) to be evaluated. This parameter can be provided as either a dictionary or a list of dictionaries. Each dictionary should contain the following keys: <ul><li>model (mandatory): \tPipelineModel or path to a saved model or pretrained pipeline/model from hub.</li><li>hub (mandatory): Hub (library) to use in back-end for loading model from public models hub or from path</li></ul>|\n",
+        "| **data**      | The data to be used for evaluation. A dictionary providing flexibility and options for data sources. It should include the following keys: <ul><li>data_source (mandatory): The source of the data.</li><li>subset (optional): The subset of the data.</li><li>feature_column (optional): The column containing the features.</li><li>target_column (optional): The column containing the target labels.</li><li>split (optional): The data split to be used.</li><li>source (optional): Set to 'huggingface' when loading Hugging Face dataset.</li></ul> |\n",
+        "| **config**    | Configuration for the tests to be performed, specified in the form of a YAML file. |\n",
+        "\n",
+        "<br/>\n",
+        "<br/>"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JFhJ9CcbsKqN"
+      },
+      "source": [
+        "# OpenAI Model Testing For Question Answering\n",
+        "\n",
+        "In this section, we dive into testing of OpenAI models in Question Answering task.\n",
+        "\n",
+        "LangTest supports robustness tests for LLM testing for now."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kKgXC7cvuyar"
+      },
+      "source": [
+        "### Set environment for OpenAI"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "id": "MHqlSjFLuy7o"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"<YOUR_API_KEY>\" #Replace with your OpenAI API Key"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## BoolQ-test-tiny dataset testing"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "The YAML content defines a task named \"BoolQ\" that specifies how an intelligent bot should respond to queries. The task instructions dictate that the bot must provide a concise answer of either \"true\" or \"false.\" The `prompt_type` is set to \"instruct,\" indicating that the bot should execute the task based on direct commands rather than engaging in conversational interaction.\n",
+        "\n",
+        "The YAML also includes examples to illustrate how the bot should handle specific questions. Each example contains a \"context\" that provides background information and a \"question\" that the bot needs to answer with either \"true\" or \"false.\" In the provided examples:\n",
+        "1. The context discusses the renewal of the series \"The Good Fight\" for a third season, and the question asks whether there is a third series of \"The Good Fight,\" to which the bot correctly responds \"True.\"\n",
+        "2. The context mentions the cancellation of \"Lost in Space\" at the end of season 3 without resolving the story, and the question asks whether the Robinsons ever returned to Earth, to which the bot incorrectly responds \"True,\" presumably due to the bot misunderstanding or misinterpreting the context."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "yaml_content = \"\"\"\n",
+        "prompt_config:\n",
+        "  \"BoolQ\":\n",
+        "    instructions: \"You are an intelligent bot and it is your responsibility to make sure to give a concise answer. Answer should be `true` or  `false`.\"\n",
+        "    prompt_type: \"instruct\" # instruct for completion and chat for conversation(chat models)\n",
+        "    examples:\n",
+        "      - user:\n",
+        "          context: \"The Good Fight -- A second 13-episode season premiered on March 4, 2018. On May 2, 2018, the series was renewed for a third season.\"\n",
+        "          question: \"is there a third series of the good fight?\"\n",
+        "        ai:\n",
+        "          answer: \"True\"\n",
+        "      - user:\n",
+        "          context: \"Lost in Space -- The fate of the castaways is never resolved, as the series was unexpectedly canceled at the end of season 3.\"\n",
+        "          question: \"did the robinsons ever get back to earth\"\n",
+        "        ai:\n",
+        "          answer: \"True\"\n",
+        "  \"NQ-open\":\n",
+        "    instructions: \"You are an intelligent bot and it is your responsibility to make sure to give a short concise answer.\"\n",
+        "    prompt_type: \"instruct\" # completion\n",
+        "    examples:\n",
+        "      - user:\n",
+        "          question: \"where does the electron come from in beta decay?\"\n",
+        "        ai:\n",
+        "          answer: \"an atomic nucleus\"\n",
+        "      - user:\n",
+        "          question: \"who wrote you're a grand ol flag?\"\n",
+        "        ai:\n",
+        "          answer: \"George M. Cohan\"\n",
+        "\n",
+        "tests:\n",
+        "  defaults:\n",
+        "    min_pass_rate: 0.8\n",
+        "  robustness:\n",
+        "    uppercase:\n",
+        "      min_pass_rate: 0.8\n",
+        "    add_typo:\n",
+        "      min_pass_rate: 0.8\n",
+        "\"\"\"\n",
+        "\n",
+        "with open(\"config.yaml\", \"w\") as f:\n",
+        "    f.write(yaml_content)\n"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "swaYPW-wPlku"
+      },
+      "source": [
+        "### Setup and Configure Harness"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "p_5nO14bvTzt",
+        "outputId": "cee6c5f4-6f32-4f72-e9db-440a410b59c7"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Test Configuration : \n",
+            " {\n",
+            " \"prompt_config\": {\n",
+            "  \"BoolQ\": {\n",
+            "   \"instructions\": \"You are an intelligent bot and it is your responsibility to make sure to give a concise answer. Answer should be `true` or  `false`.\",\n",
+            "   \"prompt_type\": \"instruct\",\n",
+            "   \"examples\": [\n",
+            "    {\n",
+            "     \"user\": {\n",
+            "      \"context\": \"The Good Fight -- A second 13-episode season premiered on March 4, 2018. On May 2, 2018, the series was renewed for a third season.\",\n",
+            "      \"question\": \"is there a third series of the good fight?\"\n",
+            "     },\n",
+            "     \"ai\": {\n",
+            "      \"answer\": \"True\"\n",
+            "     }\n",
+            "    },\n",
+            "    {\n",
+            "     \"user\": {\n",
+            "      \"context\": \"Lost in Space -- The fate of the castaways is never resolved, as the series was unexpectedly canceled at the end of season 3.\",\n",
+            "      \"question\": \"did the robinsons ever get back to earth\"\n",
+            "     },\n",
+            "     \"ai\": {\n",
+            "      \"answer\": \"True\"\n",
+            "     }\n",
+            "    }\n",
+            "   ]\n",
+            "  },\n",
+            "  \"NQ-open\": {\n",
+            "   \"instructions\": \"You are an intelligent bot and it is your responsibility to make sure to give a short concise answer.\",\n",
+            "   \"prompt_type\": \"instruct\",\n",
+            "   \"examples\": [\n",
+            "    {\n",
+            "     \"user\": {\n",
+            "      \"question\": \"where does the electron come from in beta decay?\"\n",
+            "     },\n",
+            "     \"ai\": {\n",
+            "      \"answer\": \"an atomic nucleus\"\n",
+            "     }\n",
+            "    },\n",
+            "    {\n",
+            "     \"user\": {\n",
+            "      \"question\": \"who wrote you're a grand ol flag?\"\n",
+            "     },\n",
+            "     \"ai\": {\n",
+            "      \"answer\": \"George M. Cohan\"\n",
+            "     }\n",
+            "    }\n",
+            "   ]\n",
+            "  }\n",
+            " },\n",
+            " \"tests\": {\n",
+            "  \"defaults\": {\n",
+            "   \"min_pass_rate\": 0.8\n",
+            "  },\n",
+            "  \"robustness\": {\n",
+            "   \"uppercase\": {\n",
+            "    \"min_pass_rate\": 0.8\n",
+            "   },\n",
+            "   \"add_typo\": {\n",
+            "    \"min_pass_rate\": 0.8\n",
+            "   }\n",
+            "  }\n",
+            " }\n",
+            "}\n"
+          ]
+        }
+      ],
+      "source": [
+        "harness = Harness(\n",
+        "                  task=\"question-answering\", \n",
+        "                  model={\"model\": \"gpt-3.5-turbo-instruct\",\"hub\":\"openai\"}, \n",
+        "                  data=[{\"data_source\" :\"BoolQ\",\n",
+        "                        \"split\":\"test-tiny\"},\n",
+        "                        {\"data_source\" :\"NQ-open\",\n",
+        "                         \"split\":\"test-tiny\"}],\n",
+        "                  config=\"config.yaml\"\n",
+        "                  )"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jWPAw9q0PwD1"
+      },
+      "source": [
+        "We have specified task as QA, hub as OpenAI and model as GPT-3.5.\n",
+        "\n",
+        "For dataset we used `BoolQ` dataset and `test-tiny` split which includes 50 samples. Other available datasets are: [Benchmark Datasets](https://langtest.org/docs/pages/docs/data#question-answering)"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "For tests we used lowercase and uppercase. Other available robustness tests for QA task are:\n",
+        "* `add_context`\n",
+        "* `add_contraction`\n",
+        "* `add_punctuation`\n",
+        "* `add_typo`\n",
+        "* `add_ocr_typo`\n",
+        "* `american_to_british`\n",
+        "* `british_to_american`\n",
+        "* `lowercase`\n",
+        "* `strip_punctuation`\n",
+        "* `titlecase`\n",
+        "* `uppercase`\n",
+        "* `number_to_word`\n",
+        "* `add_abbreviation`\n",
+        "* `add_speech_to_text_typo`\n",
+        "* `add_slangs`\n",
+        "* `dyslexia_word_swap`\n",
+        "* `multiple_perturbations`\n",
+        "* `adjective_synonym_swap`\n",
+        "* `adjective_antonym_swap`\n",
+        "* `strip_all_punctuation`"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Available Bias tests for QA task are:\n",
+        "\n",
+        "* `replace_to_male_pronouns`\n",
+        "* `replace_to_female_pronouns`\n",
+        "* `replace_to_neutral_pronouns`\n",
+        "* `replace_to_high_income_country`\n",
+        "* `replace_to_low_income_country`\n",
+        "* `replace_to_upper_middle_income_country`\n",
+        "* `replace_to_lower_middle_income_country`\n",
+        "* `replace_to_white_firstnames`\n",
+        "* `replace_to_black_firstnames`\n",
+        "* `replace_to_hispanic_firstnames`\n",
+        "* `replace_to_asian_firstnames`\n",
+        "* `replace_to_white_lastnames`\n",
+        "* `replace_to_sikh_names`\n",
+        "* `replace_to_christian_names`\n",
+        "* `replace_to_hindu_names`\n",
+        "* `replace_to_muslim_names`\n",
+        "* `replace_to_inter_racial_lastnames`\n",
+        "* `replace_to_native_american_lastnames`\n",
+        "* `replace_to_asian_lastnames`\n",
+        "* `replace_to_hispanic_lastnames`\n",
+        "* `replace_to_black_lastnames`\n",
+        "* `replace_to_parsi_names`\n",
+        "* `replace_to_jain_names`\n",
+        "* `replace_to_buddhist_names`\n",
+        "\n",
+        "Available Representation tests for QA task are:\n",
+        "\n",
+        "* `min_gender_representation_count`\n",
+        "* `min_ethnicity_name_representation_count`\n",
+        "* `min_religion_name_representation_count`\n",
+        "* `min_country_economic_representation_count`\n",
+        "* `min_gender_representation_proportion`\n",
+        "* `min_ethnicity_name_representation_proportion`\n",
+        "* `min_religion_name_representation_proportion`\n",
+        "* `min_country_economic_representation_proportion`\n",
+        "\n",
+        "\n",
+        "Available Accuracy tests for QA task are:\n",
+        "\n",
+        "* `min_exact_match_score`\n",
+        "* `min_bleu_score`\n",
+        "* `min_rouge1_score`\n",
+        "* `min_rouge2_score`\n",
+        "* `min_rougeL_score`\n",
+        "* `min_rougeLsum_score`\n",
+        "\n",
+        "\n",
+        "Available Fairness tests for QA task are:\n",
+        "\n",
+        "* `max_gender_rouge1_score`\n",
+        "* `max_gender_rouge2_score`\n",
+        "* `max_gender_rougeL_score`\n",
+        "* `max_gender_rougeLsum_score`\n",
+        "* `min_gender_rouge1_score`\n",
+        "* `min_gender_rouge2_score`\n",
+        "* `min_gender_rougeL_score`\n",
+        "* `min_gender_rougeLsum_score`"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "You can also set prompts and other model parameters in config. Possible parameters are:\n",
+        "* `user_promt:` Promt to be given to the model.\n",
+        "* `temperature:` Temperature of the model.\n",
+        "* `max_tokens:` Maximum number of output tokens allowed for model."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZPU46A7WigFr"
+      },
+      "source": [
+        "Here we have configured the harness to perform two robustness tests (uppercase and lowercase) and defined the minimum pass rate for each test."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "➤ You can adjust the level of transformation in the sentence by using the \"`prob`\" parameter, which controls the proportion of words to be changed during robustness tests.\n",
+        "\n",
+        "➤ **NOTE** : \"`prob`\" defaults to 1.0, which means all words will be transformed.\n",
+        "```\n",
+        "harness.configure(\n",
+        "{\n",
+        " 'tests': {\n",
+        "    'defaults': {'min_pass_rate': 0.65},\n",
+        "      'robustness': {\n",
+        "        'lowercase': {'min_pass_rate': 0.66, 'prob': 0.50}, \n",
+        "        'uppercase':{'min_pass_rate': 0.60, 'prob': 0.70},\n",
+        "      }\n",
+        "  }\n",
+        "})\n",
+        "\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "harness.data = {k: v[:10] for k, v in harness.data.items()}"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "i6kPvA13F7cr"
+      },
+      "source": [
+        "\n",
+        "### Generating the test cases."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "mdNH3wCKF9fn",
+        "outputId": "cd348490-7ade-40fa-d870-dc059f5aa647"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "================================================================================\n",
+            "                                     BoolQ                                      \n",
+            "================================================================================\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Generating testcases...: 100%|██████████| 1/1 [00:00<00:00, 995.80it/s]\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "--------------------------------------------------------------------------------\n",
+            "\n",
+            "================================================================================\n",
+            "                                    NQ-open                                     \n",
+            "================================================================================\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Generating testcases...: 100%|██████████| 1/1 [00:00<?, ?it/s]\n",
+            "WARNING:root:[W009] Removing samples where no transformation has been applied:\n",
+            "[W010] - Test 'add_typo': 5 samples removed out of 50\n",
+            "\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "--------------------------------------------------------------------------------\n",
+            "\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": []
+          },
+          "execution_count": 5,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "harness.generate()"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nyjDdYLeGCmM"
+      },
+      "source": [
+        "harness.generate() method automatically generates the test cases (based on the provided configuration)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 641
+        },
+        "id": "c0jL1_G7F_p6",
+        "outputId": "502c1525-9000-4041-823b-3b04f6650892"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>category</th>\n",
+              "      <th>dataset_name</th>\n",
+              "      <th>test_type</th>\n",
+              "      <th>original_context</th>\n",
+              "      <th>original_question</th>\n",
+              "      <th>perturbed_context</th>\n",
+              "      <th>perturbed_question</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>BoolQ</td>\n",
+              "      <td>uppercase</td>\n",
+              "      <td>20 euro note -- Until now there has been only ...</td>\n",
+              "      <td>is the first series 20 euro note still legal t...</td>\n",
+              "      <td>20 EURO NOTE -- UNTIL NOW THERE HAS BEEN ONLY ...</td>\n",
+              "      <td>IS THE FIRST SERIES 20 EURO NOTE STILL LEGAL T...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>BoolQ</td>\n",
+              "      <td>uppercase</td>\n",
+              "      <td>2018–19 UEFA Champions League -- The final wil...</td>\n",
+              "      <td>do the champions league winners get automatic ...</td>\n",
+              "      <td>2018–19 UEFA CHAMPIONS LEAGUE -- THE FINAL WIL...</td>\n",
+              "      <td>DO THE CHAMPIONS LEAGUE WINNERS GET AUTOMATIC ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>BoolQ</td>\n",
+              "      <td>uppercase</td>\n",
+              "      <td>Bullsnake -- Bullsnakes are very powerful cons...</td>\n",
+              "      <td>can a bull snake kill a small dog</td>\n",
+              "      <td>BULLSNAKE -- BULLSNAKES ARE VERY POWERFUL CONS...</td>\n",
+              "      <td>CAN A BULL SNAKE KILL A SMALL DOG</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>BoolQ</td>\n",
+              "      <td>uppercase</td>\n",
+              "      <td>NBA playoffs -- All rounds are best-of-seven s...</td>\n",
+              "      <td>are all nba playoff games best of 7</td>\n",
+              "      <td>NBA PLAYOFFS -- ALL ROUNDS ARE BEST-OF-SEVEN S...</td>\n",
+              "      <td>ARE ALL NBA PLAYOFF GAMES BEST OF 7</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>BoolQ</td>\n",
+              "      <td>uppercase</td>\n",
+              "      <td>Manchester station group -- The Manchester sta...</td>\n",
+              "      <td>can i use my train ticket on the tram in manch...</td>\n",
+              "      <td>MANCHESTER STATION GROUP -- THE MANCHESTER STA...</td>\n",
+              "      <td>CAN I USE MY TRAIN TICKET ON THE TRAM IN MANCH...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>190</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>NQ-open</td>\n",
+              "      <td>add_typo</td>\n",
+              "      <td>-</td>\n",
+              "      <td>who has the most followers on the twitter</td>\n",
+              "      <td>-</td>\n",
+              "      <td>who has the most followers on tme twitter</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>191</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>NQ-open</td>\n",
+              "      <td>add_typo</td>\n",
+              "      <td>-</td>\n",
+              "      <td>who said it's not what your country can do for...</td>\n",
+              "      <td>-</td>\n",
+              "      <td>who said it's not what your country can do for...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>192</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>NQ-open</td>\n",
+              "      <td>add_typo</td>\n",
+              "      <td>-</td>\n",
+              "      <td>when does lil wayne new album drop 2018</td>\n",
+              "      <td>-</td>\n",
+              "      <td>jhen does lil wayne new album drop 2018</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>193</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>NQ-open</td>\n",
+              "      <td>add_typo</td>\n",
+              "      <td>-</td>\n",
+              "      <td>the khajuraho temples are especially well know...</td>\n",
+              "      <td>-</td>\n",
+              "      <td>the khajuraho temples are rspecially well know...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>194</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>NQ-open</td>\n",
+              "      <td>add_typo</td>\n",
+              "      <td>-</td>\n",
+              "      <td>when does the regular nba basketball season start</td>\n",
+              "      <td>-</td>\n",
+              "      <td>when does the regular nba basuetball season start</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>195 rows × 7 columns</p>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "       category dataset_name  test_type  \\\n",
+              "0    robustness        BoolQ  uppercase   \n",
+              "1    robustness        BoolQ  uppercase   \n",
+              "2    robustness        BoolQ  uppercase   \n",
+              "3    robustness        BoolQ  uppercase   \n",
+              "4    robustness        BoolQ  uppercase   \n",
+              "..          ...          ...        ...   \n",
+              "190  robustness      NQ-open   add_typo   \n",
+              "191  robustness      NQ-open   add_typo   \n",
+              "192  robustness      NQ-open   add_typo   \n",
+              "193  robustness      NQ-open   add_typo   \n",
+              "194  robustness      NQ-open   add_typo   \n",
+              "\n",
+              "                                      original_context  \\\n",
+              "0    20 euro note -- Until now there has been only ...   \n",
+              "1    2018–19 UEFA Champions League -- The final wil...   \n",
+              "2    Bullsnake -- Bullsnakes are very powerful cons...   \n",
+              "3    NBA playoffs -- All rounds are best-of-seven s...   \n",
+              "4    Manchester station group -- The Manchester sta...   \n",
+              "..                                                 ...   \n",
+              "190                                                  -   \n",
+              "191                                                  -   \n",
+              "192                                                  -   \n",
+              "193                                                  -   \n",
+              "194                                                  -   \n",
+              "\n",
+              "                                     original_question  \\\n",
+              "0    is the first series 20 euro note still legal t...   \n",
+              "1    do the champions league winners get automatic ...   \n",
+              "2                    can a bull snake kill a small dog   \n",
+              "3                  are all nba playoff games best of 7   \n",
+              "4    can i use my train ticket on the tram in manch...   \n",
+              "..                                                 ...   \n",
+              "190          who has the most followers on the twitter   \n",
+              "191  who said it's not what your country can do for...   \n",
+              "192            when does lil wayne new album drop 2018   \n",
+              "193  the khajuraho temples are especially well know...   \n",
+              "194  when does the regular nba basketball season start   \n",
+              "\n",
+              "                                     perturbed_context  \\\n",
+              "0    20 EURO NOTE -- UNTIL NOW THERE HAS BEEN ONLY ...   \n",
+              "1    2018–19 UEFA CHAMPIONS LEAGUE -- THE FINAL WIL...   \n",
+              "2    BULLSNAKE -- BULLSNAKES ARE VERY POWERFUL CONS...   \n",
+              "3    NBA PLAYOFFS -- ALL ROUNDS ARE BEST-OF-SEVEN S...   \n",
+              "4    MANCHESTER STATION GROUP -- THE MANCHESTER STA...   \n",
+              "..                                                 ...   \n",
+              "190                                                  -   \n",
+              "191                                                  -   \n",
+              "192                                                  -   \n",
+              "193                                                  -   \n",
+              "194                                                  -   \n",
+              "\n",
+              "                                    perturbed_question  \n",
+              "0    IS THE FIRST SERIES 20 EURO NOTE STILL LEGAL T...  \n",
+              "1    DO THE CHAMPIONS LEAGUE WINNERS GET AUTOMATIC ...  \n",
+              "2                    CAN A BULL SNAKE KILL A SMALL DOG  \n",
+              "3                  ARE ALL NBA PLAYOFF GAMES BEST OF 7  \n",
+              "4    CAN I USE MY TRAIN TICKET ON THE TRAM IN MANCH...  \n",
+              "..                                                 ...  \n",
+              "190          who has the most followers on tme twitter  \n",
+              "191  who said it's not what your country can do for...  \n",
+              "192            jhen does lil wayne new album drop 2018  \n",
+              "193  the khajuraho temples are rspecially well know...  \n",
+              "194  when does the regular nba basuetball season start  \n",
+              "\n",
+              "[195 rows x 7 columns]"
+            ]
+          },
+          "execution_count": 6,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "harness.testcases()"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NOJ8BAU2GGzd"
+      },
+      "source": [
+        "harness.testcases() method displays the produced test cases in form of a pandas data frame."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3CwhQw6hGR9S"
+      },
+      "source": [
+        "### Running the tests"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "aguX6-aFGOnP",
+        "outputId": "bb014811-522b-4f07-fa8a-bf3d1c906d7f"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "================================================================================\n",
+            "                                     BoolQ                                      \n",
+            "================================================================================\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Running testcases... : 100%|██████████| 100/100 [01:19<00:00,  1.26it/s]\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "--------------------------------------------------------------------------------\n",
+            "\n",
+            "================================================================================\n",
+            "                                    NQ-open                                     \n",
+            "================================================================================\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Running testcases... : 100%|██████████| 95/95 [01:47<00:00,  1.13s/it]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "--------------------------------------------------------------------------------\n",
+            "\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": []
+          },
+          "execution_count": 7,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "harness.run()"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "191O2oaUGWrH"
+      },
+      "source": [
+        "Called after harness.generate() and is to used to run all the tests.  Returns a pass/fail flag for each test."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 797
+        },
+        "id": "-cXkdnihGYke",
+        "outputId": "2aa88caa-5e83-44fe-b3aa-b81b9ae9115a"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>category</th>\n",
+              "      <th>dataset_name</th>\n",
+              "      <th>test_type</th>\n",
+              "      <th>original_context</th>\n",
+              "      <th>original_question</th>\n",
+              "      <th>perturbed_context</th>\n",
+              "      <th>perturbed_question</th>\n",
+              "      <th>expected_result</th>\n",
+              "      <th>actual_result</th>\n",
+              "      <th>pass</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>BoolQ</td>\n",
+              "      <td>uppercase</td>\n",
+              "      <td>20 euro note -- Until now there has been only ...</td>\n",
+              "      <td>is the first series 20 euro note still legal t...</td>\n",
+              "      <td>20 EURO NOTE -- UNTIL NOW THERE HAS BEEN ONLY ...</td>\n",
+              "      <td>IS THE FIRST SERIES 20 EURO NOTE STILL LEGAL T...</td>\n",
+              "      <td>\\nFalse</td>\n",
+              "      <td>\\nFalse</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>BoolQ</td>\n",
+              "      <td>uppercase</td>\n",
+              "      <td>2018–19 UEFA Champions League -- The final wil...</td>\n",
+              "      <td>do the champions league winners get automatic ...</td>\n",
+              "      <td>2018–19 UEFA CHAMPIONS LEAGUE -- THE FINAL WIL...</td>\n",
+              "      <td>DO THE CHAMPIONS LEAGUE WINNERS GET AUTOMATIC ...</td>\n",
+              "      <td>\\nTrue</td>\n",
+              "      <td>\\nTrue</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>BoolQ</td>\n",
+              "      <td>uppercase</td>\n",
+              "      <td>Bullsnake -- Bullsnakes are very powerful cons...</td>\n",
+              "      <td>can a bull snake kill a small dog</td>\n",
+              "      <td>BULLSNAKE -- BULLSNAKES ARE VERY POWERFUL CONS...</td>\n",
+              "      <td>CAN A BULL SNAKE KILL A SMALL DOG</td>\n",
+              "      <td>\\nFalse</td>\n",
+              "      <td>\\nFalse</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>BoolQ</td>\n",
+              "      <td>uppercase</td>\n",
+              "      <td>NBA playoffs -- All rounds are best-of-seven s...</td>\n",
+              "      <td>are all nba playoff games best of 7</td>\n",
+              "      <td>NBA PLAYOFFS -- ALL ROUNDS ARE BEST-OF-SEVEN S...</td>\n",
+              "      <td>ARE ALL NBA PLAYOFF GAMES BEST OF 7</td>\n",
+              "      <td>\\nTrue</td>\n",
+              "      <td>True</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>BoolQ</td>\n",
+              "      <td>uppercase</td>\n",
+              "      <td>Manchester station group -- The Manchester sta...</td>\n",
+              "      <td>can i use my train ticket on the tram in manch...</td>\n",
+              "      <td>MANCHESTER STATION GROUP -- THE MANCHESTER STA...</td>\n",
+              "      <td>CAN I USE MY TRAIN TICKET ON THE TRAM IN MANCH...</td>\n",
+              "      <td>\\nTrue</td>\n",
+              "      <td>\\nFalse</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>190</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>NQ-open</td>\n",
+              "      <td>add_typo</td>\n",
+              "      <td>-</td>\n",
+              "      <td>who has the most followers on the twitter</td>\n",
+              "      <td>-</td>\n",
+              "      <td>who has the most followers on tme twitter</td>\n",
+              "      <td>?\\n\\nAs of 2021, the person with the most foll...</td>\n",
+              "      <td>?\\n\\nAs of June 2021, the account with the mos...</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>191</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>NQ-open</td>\n",
+              "      <td>add_typo</td>\n",
+              "      <td>-</td>\n",
+              "      <td>who said it's not what your country can do for...</td>\n",
+              "      <td>-</td>\n",
+              "      <td>who said it's not what your country can do for...</td>\n",
+              "      <td>?\\n\\nJohn F. Kennedy</td>\n",
+              "      <td>?\\n\\n\\nJohn F. Kennedy</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>192</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>NQ-open</td>\n",
+              "      <td>add_typo</td>\n",
+              "      <td>-</td>\n",
+              "      <td>when does lil wayne new album drop 2018</td>\n",
+              "      <td>-</td>\n",
+              "      <td>jhen does lil wayne new album drop 2018</td>\n",
+              "      <td>\\nLil Wayne's album, \"Tha Carter V,\" was relea...</td>\n",
+              "      <td>?\\n\\nThere is no official release date for Lil...</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>193</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>NQ-open</td>\n",
+              "      <td>add_typo</td>\n",
+              "      <td>-</td>\n",
+              "      <td>the khajuraho temples are especially well know...</td>\n",
+              "      <td>-</td>\n",
+              "      <td>the khajuraho temples are rspecially well know...</td>\n",
+              "      <td>\\nerotic sculptures</td>\n",
+              "      <td>?\\n\\nerotic sculptures.</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>194</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>NQ-open</td>\n",
+              "      <td>add_typo</td>\n",
+              "      <td>-</td>\n",
+              "      <td>when does the regular nba basketball season start</td>\n",
+              "      <td>-</td>\n",
+              "      <td>when does the regular nba basuetball season start</td>\n",
+              "      <td>?\\nThe regular NBA basketball season typically...</td>\n",
+              "      <td>?\\n\\nThe regular NBA basketball season typical...</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>195 rows × 10 columns</p>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "       category dataset_name  test_type  \\\n",
+              "0    robustness        BoolQ  uppercase   \n",
+              "1    robustness        BoolQ  uppercase   \n",
+              "2    robustness        BoolQ  uppercase   \n",
+              "3    robustness        BoolQ  uppercase   \n",
+              "4    robustness        BoolQ  uppercase   \n",
+              "..          ...          ...        ...   \n",
+              "190  robustness      NQ-open   add_typo   \n",
+              "191  robustness      NQ-open   add_typo   \n",
+              "192  robustness      NQ-open   add_typo   \n",
+              "193  robustness      NQ-open   add_typo   \n",
+              "194  robustness      NQ-open   add_typo   \n",
+              "\n",
+              "                                      original_context  \\\n",
+              "0    20 euro note -- Until now there has been only ...   \n",
+              "1    2018–19 UEFA Champions League -- The final wil...   \n",
+              "2    Bullsnake -- Bullsnakes are very powerful cons...   \n",
+              "3    NBA playoffs -- All rounds are best-of-seven s...   \n",
+              "4    Manchester station group -- The Manchester sta...   \n",
+              "..                                                 ...   \n",
+              "190                                                  -   \n",
+              "191                                                  -   \n",
+              "192                                                  -   \n",
+              "193                                                  -   \n",
+              "194                                                  -   \n",
+              "\n",
+              "                                     original_question  \\\n",
+              "0    is the first series 20 euro note still legal t...   \n",
+              "1    do the champions league winners get automatic ...   \n",
+              "2                    can a bull snake kill a small dog   \n",
+              "3                  are all nba playoff games best of 7   \n",
+              "4    can i use my train ticket on the tram in manch...   \n",
+              "..                                                 ...   \n",
+              "190          who has the most followers on the twitter   \n",
+              "191  who said it's not what your country can do for...   \n",
+              "192            when does lil wayne new album drop 2018   \n",
+              "193  the khajuraho temples are especially well know...   \n",
+              "194  when does the regular nba basketball season start   \n",
+              "\n",
+              "                                     perturbed_context  \\\n",
+              "0    20 EURO NOTE -- UNTIL NOW THERE HAS BEEN ONLY ...   \n",
+              "1    2018–19 UEFA CHAMPIONS LEAGUE -- THE FINAL WIL...   \n",
+              "2    BULLSNAKE -- BULLSNAKES ARE VERY POWERFUL CONS...   \n",
+              "3    NBA PLAYOFFS -- ALL ROUNDS ARE BEST-OF-SEVEN S...   \n",
+              "4    MANCHESTER STATION GROUP -- THE MANCHESTER STA...   \n",
+              "..                                                 ...   \n",
+              "190                                                  -   \n",
+              "191                                                  -   \n",
+              "192                                                  -   \n",
+              "193                                                  -   \n",
+              "194                                                  -   \n",
+              "\n",
+              "                                    perturbed_question  \\\n",
+              "0    IS THE FIRST SERIES 20 EURO NOTE STILL LEGAL T...   \n",
+              "1    DO THE CHAMPIONS LEAGUE WINNERS GET AUTOMATIC ...   \n",
+              "2                    CAN A BULL SNAKE KILL A SMALL DOG   \n",
+              "3                  ARE ALL NBA PLAYOFF GAMES BEST OF 7   \n",
+              "4    CAN I USE MY TRAIN TICKET ON THE TRAM IN MANCH...   \n",
+              "..                                                 ...   \n",
+              "190          who has the most followers on tme twitter   \n",
+              "191  who said it's not what your country can do for...   \n",
+              "192            jhen does lil wayne new album drop 2018   \n",
+              "193  the khajuraho temples are rspecially well know...   \n",
+              "194  when does the regular nba basuetball season start   \n",
+              "\n",
+              "                                       expected_result  \\\n",
+              "0                                              \\nFalse   \n",
+              "1                                               \\nTrue   \n",
+              "2                                              \\nFalse   \n",
+              "3                                               \\nTrue   \n",
+              "4                                               \\nTrue   \n",
+              "..                                                 ...   \n",
+              "190  ?\\n\\nAs of 2021, the person with the most foll...   \n",
+              "191                               ?\\n\\nJohn F. Kennedy   \n",
+              "192  \\nLil Wayne's album, \"Tha Carter V,\" was relea...   \n",
+              "193                                \\nerotic sculptures   \n",
+              "194  ?\\nThe regular NBA basketball season typically...   \n",
+              "\n",
+              "                                         actual_result   pass  \n",
+              "0                                             \\nFalse    True  \n",
+              "1                                               \\nTrue   True  \n",
+              "2                                              \\nFalse   True  \n",
+              "3                                                 True   True  \n",
+              "4                                              \\nFalse  False  \n",
+              "..                                                 ...    ...  \n",
+              "190  ?\\n\\nAs of June 2021, the account with the mos...   True  \n",
+              "191                             ?\\n\\n\\nJohn F. Kennedy   True  \n",
+              "192  ?\\n\\nThere is no official release date for Lil...  False  \n",
+              "193                            ?\\n\\nerotic sculptures.   True  \n",
+              "194  ?\\n\\nThe regular NBA basketball season typical...   True  \n",
+              "\n",
+              "[195 rows x 10 columns]"
+            ]
+          },
+          "execution_count": 8,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "harness.generated_results()"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TKB8Rsr2GZME"
+      },
+      "source": [
+        "This method returns the generated results in the form of a pandas dataframe, which provides a convenient and easy-to-use format for working with the test results. You can use this method to quickly identify the test cases that failed and to determine where fixes are needed."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PBSlpWnUU55G"
+      },
+      "source": [
+        "### Final Results"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "We can call `.report()` which summarizes the results giving information about pass and fail counts and overall test pass/fail flag."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 112
+        },
+        "id": "gp57HcF9yxi7",
+        "outputId": "b893072f-102a-45a6-be03-d737996e659c"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead tr th {\n",
+              "        text-align: left;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead tr:last-of-type th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th colspan=\"5\" halign=\"left\">Benchmarking Results: gpt-3.5-turbo-instruct</th>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th>fail_count</th>\n",
+              "      <th>pass_count</th>\n",
+              "      <th>pass_rate</th>\n",
+              "      <th>minimum_pass_rate</th>\n",
+              "      <th>pass</th>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>dataset_name</th>\n",
+              "      <th>category</th>\n",
+              "      <th>test_type</th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th rowspan=\"2\" valign=\"top\">BoolQ</th>\n",
+              "      <th rowspan=\"2\" valign=\"top\">robustness</th>\n",
+              "      <th>uppercase</th>\n",
+              "      <td>14</td>\n",
+              "      <td>36</td>\n",
+              "      <td>72%</td>\n",
+              "      <td>80%</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>add_typo</th>\n",
+              "      <td>8</td>\n",
+              "      <td>42</td>\n",
+              "      <td>84%</td>\n",
+              "      <td>80%</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th rowspan=\"2\" valign=\"top\">NQ-open</th>\n",
+              "      <th rowspan=\"2\" valign=\"top\">robustness</th>\n",
+              "      <th>uppercase</th>\n",
+              "      <td>9</td>\n",
+              "      <td>41</td>\n",
+              "      <td>82%</td>\n",
+              "      <td>80%</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>add_typo</th>\n",
+              "      <td>10</td>\n",
+              "      <td>35</td>\n",
+              "      <td>78%</td>\n",
+              "      <td>80%</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                                  Benchmarking Results: gpt-3.5-turbo-instruct  \\\n",
+              "                                                                    fail_count   \n",
+              "dataset_name category   test_type                                                \n",
+              "BoolQ        robustness uppercase                                           14   \n",
+              "                        add_typo                                             8   \n",
+              "NQ-open      robustness uppercase                                            9   \n",
+              "                        add_typo                                            10   \n",
+              "\n",
+              "                                                                          \\\n",
+              "                                  pass_count pass_rate minimum_pass_rate   \n",
+              "dataset_name category   test_type                                          \n",
+              "BoolQ        robustness uppercase         36       72%               80%   \n",
+              "                        add_typo          42       84%               80%   \n",
+              "NQ-open      robustness uppercase         41       82%               80%   \n",
+              "                        add_typo          35       78%               80%   \n",
+              "\n",
+              "                                          \n",
+              "                                    pass  \n",
+              "dataset_name category   test_type         \n",
+              "BoolQ        robustness uppercase  False  \n",
+              "                        add_typo    True  \n",
+              "NQ-open      robustness uppercase   True  \n",
+              "                        add_typo   False  "
+            ]
+          },
+          "execution_count": 9,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "harness.report()"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "TPU",
+    "colab": {
+      "machine_shape": "hm",
+      "provenance": [],
+      "toc_visible": true
+    },
+    "gpuClass": "standard",
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.10"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/langtest/prompts.py b/langtest/prompts.py
index 870b451c4..b9df1d28e 100644
--- a/langtest/prompts.py
+++ b/langtest/prompts.py
@@ -33,12 +33,11 @@ def get_template(self):
 
         temp = []
         order_less = []
-        for field in self.__field_order:
-            formatted = f"{field.title()}: {{{field}}}"
-            if field in self.__dict__:
-                temp.append(formatted)
+        for field in self.__dict__:
+            if field in self.__field_order:
+                temp.append(f"{field.title()}: {{{field}}}")
             else:
-                order_less.append(formatted)
+                order_less.append(f"{field.title()}: {{{field}}}")
         if order_less:
             temp.extend(order_less)
         return "\n" + "\n".join(temp)
@@ -175,6 +174,7 @@ def get_prompt(self, hub=None):
         return self.prompt_style()
 
     def get_shot_prompt(self):
+        print(self.get_examples)
         return f"{len(self.get_examples)}-shot prompt"
 
     def lm_studio_prompt(self):

From e7e94a219ce68fa4d28b885365b4bb1ac0e610e0 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Sat, 11 May 2024 13:13:56 +0530
Subject: [PATCH 16/16] fix lint and format issue

---
 langtest/langtest.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index 2b9758b05..021d1c77a 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -1665,12 +1665,11 @@ def __reset_defaults(self):
         """Reset the default values."""
         model_response = TestResultManager()
         model_response.clear_data()
-        
+
         # Reset the PromptManager
         prompt_manager = PromptManager()
         prompt_manager.reset()
 
-
     def __tracking(self, *args, **kwargs):
         """Track the progress of the testcases."""
         if self.__benchmarking: