From 1a775c69c018b9f2da487ca0b58e7503b9e984c8 Mon Sep 17 00:00:00 2001 From: Cameron Fairchild Date: Tue, 4 Jul 2023 21:55:53 -0400 Subject: [PATCH 01/44] add support for 5.3.0 --- openvalidators/neuron.py | 6 +++++- requirements.txt | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openvalidators/neuron.py b/openvalidators/neuron.py index 227d2c8..5711144 100644 --- a/openvalidators/neuron.py +++ b/openvalidators/neuron.py @@ -84,7 +84,11 @@ def __init__(self): self.wallet = bt.wallet(config=self.config) self.wallet.create_if_non_existent() if not self.config.wallet._mock: - self.wallet.reregister(subtensor=self.subtensor, netuid=self.config.netuid) + bt.utils.reregister( + wallet = self.wallet, + subtensor=self.subtensor, + netuid=self.config.netuid + ) bt.logging.debug(str(self.wallet)) # Init metagraph. diff --git a/requirements.txt b/requirements.txt index 18de33a..f460d67 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -bittensor==5.2.0 +bittensor==5.3.0 transformers<=4.28.0 wandb==0.15.3 datasets==2.12.0 From de734f3c580ab313107b14058759f3e07f7abc81 Mon Sep 17 00:00:00 2001 From: Cameron Fairchild Date: Wed, 5 Jul 2023 15:15:01 -0400 Subject: [PATCH 02/44] add requirement for bittensor_wallet --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index f460d67..3afcb0f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ click==8.1.3 torchmetrics sentencepiece numpy==1.21.6 +bittensor_wallet==0.0.2 From 7b0362735f490d9e2046dcc9e5c1e3757ad5f1df Mon Sep 17 00:00:00 2001 From: Cameron Fairchild Date: Wed, 5 Jul 2023 15:15:10 -0400 Subject: [PATCH 03/44] add test helpers --- tests/helpers/__init__.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 tests/helpers/__init__.py diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py new file mode 100644 index 0000000..3ceb08d --- /dev/null +++ b/tests/helpers/__init__.py @@ -0,0 +1,25 @@ +# The MIT License (MIT) +# Copyright © 2023 Opentensor Technologies + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +from bittensor_wallet.mock import MockWallet as _MockWallet, utils as _mock_wallet_utils + +def __mock_wallet_factory__(*args, **kwargs) -> _MockWallet: + """Returns a mock wallet object.""" + + mock_wallet = _mock_wallet_utils.get_mock_wallet() + + return mock_wallet \ No newline at end of file From e3a3163c3f6bb18829603c9ca72bb41ce0b1c61b Mon Sep 17 00:00:00 2001 From: Cameron Fairchild Date: Wed, 5 Jul 2023 15:15:21 -0400 Subject: [PATCH 04/44] patch wallet factory --- tests/test_weights.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/test_weights.py b/tests/test_weights.py index 933e6a0..6fb5c6b 100644 --- a/tests/test_weights.py +++ b/tests/test_weights.py @@ -20,13 +20,30 @@ import sys from openvalidators.neuron import neuron as Neuron from openvalidators.forward import run_step -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch + +from .helpers import __mock_wallet_factory__ CLI_ARGS_STR = "validators/openvalidators/neuron.py --mock --wallet._mock --wandb.off --neuron.followup_sample_size 10 --neuron.answer_sample_size 10" SYS_ARGV = sys.argv.copy() +patcher = None + +def setUpModule(): + """Runs once for the tests in this module.""" + global patcher + patcher = patch("bittensor.wallet.__new__", __mock_wallet_factory__ ) + patcher.start() + +def tearDownModule(): + """Runs once for the tests in this module.""" + global patcher + if patcher: + patcher.stop() + + def test_uid_weights_unchanged_unless_queried(n_steps=10, n_concurrent=1): """Test that the weights of unqueried uids do not over the course of a forward pass.""" From 9a91beaa3c9ab67351410ecf7bf3a31271fa9c5f Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Thu, 6 Jul 2023 17:23:33 -0400 Subject: [PATCH 05/44] fix wandb weights logging --- openvalidators/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openvalidators/utils.py b/openvalidators/utils.py index 341fbde..9be9165 100644 --- a/openvalidators/utils.py +++ b/openvalidators/utils.py @@ -182,7 +182,7 @@ def save_state(self): bt.logging.info("save_state()") try: neuron_state_dict = { - "neuron_weights": self.moving_averaged_scores, + "neuron_weights": self.moving_averaged_scores.to('cpu').tolist(), "neuron_hotkeys": self.hotkeys, } torch.save(neuron_state_dict, f"{self.config.neuron.full_path}/model.torch") @@ -202,7 +202,7 @@ def save_state(self): "step": self.step, "block": ttl_get_block(self), **neuron_state_dict - }) + }) if not self.config.wandb.off and self.config.wandb.track_gating_model: model_artifact = wandb.Artifact(f"{gating_model_name}_gating_linear_layer", type="model") model_artifact.add_file(gating_model_file_path) From b6adb580cc01ee56f0763b8967472c26feb3a2df Mon Sep 17 00:00:00 2001 From: Cameron Fairchild Date: Tue, 11 Jul 2023 17:51:22 -0400 Subject: [PATCH 06/44] bump reqs --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3afcb0f..edda786 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -bittensor==5.3.0 +bittensor==5.3.1 transformers<=4.28.0 wandb==0.15.3 datasets==2.12.0 @@ -10,4 +10,4 @@ click==8.1.3 torchmetrics sentencepiece numpy==1.21.6 -bittensor_wallet==0.0.2 +bittensor_wallet==0.0.4 From beb5e84e909a556ea4175f93622e3e7b498d918e Mon Sep 17 00:00:00 2001 From: Cameron Fairchild Date: Tue, 11 Jul 2023 17:54:25 -0400 Subject: [PATCH 07/44] fix test helpers --- tests/helpers/__init__.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index 3ceb08d..ea6e2eb 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -17,9 +17,15 @@ from bittensor_wallet.mock import MockWallet as _MockWallet, utils as _mock_wallet_utils +_get_mock_coldkey = _mock_wallet_utils.get_mock_coldkey +_get_mock_hotkey = _mock_wallet_utils.get_mock_hotkey +_get_mock_keypair = _mock_wallet_utils.get_mock_keypair +_get_mock_wallet = _mock_wallet_utils.get_mock_wallet + + def __mock_wallet_factory__(*args, **kwargs) -> _MockWallet: """Returns a mock wallet object.""" - mock_wallet = _mock_wallet_utils.get_mock_wallet() + mock_wallet = _get_mock_wallet() - return mock_wallet \ No newline at end of file + return mock_wallet From 6d1a0012e1a0754f6dbb17206a97ac29f2b23716 Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Wed, 12 Jul 2023 14:27:46 -0400 Subject: [PATCH 08/44] add netuid tag to wandb --- openvalidators/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/openvalidators/utils.py b/openvalidators/utils.py index 341fbde..e712873 100644 --- a/openvalidators/utils.py +++ b/openvalidators/utils.py @@ -32,7 +32,11 @@ def should_reinit_wandb(self): def init_wandb(self, reinit=False): """Starts a new wandb run.""" - tags = [self.wallet.hotkey.ss58_address, openvalidators.__version__, str(openvalidators.__spec_version__)] + tags = [self.wallet.hotkey.ss58_address, + openvalidators.__version__, + str(openvalidators.__spec_version__), + f'netuid_{self.metagraph.netuid}'] + if self.config.mock: tags.append("mock") if self.config.neuron.use_custom_gating_model: From 86b99344a2c534f0cf3200acca5f38655d55438f Mon Sep 17 00:00:00 2001 From: Eugene Date: Thu, 13 Jul 2023 11:28:06 -0700 Subject: [PATCH 09/44] empty cache after saving --- openvalidators/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/openvalidators/utils.py b/openvalidators/utils.py index 341fbde..5eca03e 100644 --- a/openvalidators/utils.py +++ b/openvalidators/utils.py @@ -209,6 +209,10 @@ def save_state(self): self.wandb.log_artifact(model_artifact) bt.logging.success(prefix="Saved gating model", sufix=f"{gating_model_file_path}") + + #empty cache + torch.cuda.empty_cache() + except Exception as e: bt.logging.warning(f"Failed to save model with error: {e}") From 96de39ef671a910f87f73cc9e041a535af0a5e60 Mon Sep 17 00:00:00 2001 From: Cameron Fairchild Date: Sat, 15 Jul 2023 09:41:34 -0400 Subject: [PATCH 10/44] Use subtensor for meta sync (#79) * add type hints * dont sync on meta init * sync using subtensor obj --- openvalidators/neuron.py | 7 ++++++- openvalidators/utils.py | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/openvalidators/neuron.py b/openvalidators/neuron.py index 227d2c8..8944767 100644 --- a/openvalidators/neuron.py +++ b/openvalidators/neuron.py @@ -62,6 +62,10 @@ def config(cls): def run(self): run(self) + subtensor: "bt.subtensor" + wallet: "bt.wallet" + metagraph: "bt.metagraph" + def __init__(self): self.config = neuron.config() self.check_config(self.config) @@ -89,7 +93,8 @@ def __init__(self): # Init metagraph. bt.logging.debug("loading", "metagraph") - self.metagraph = bt.metagraph(netuid=self.config.netuid, network=self.subtensor.network) + self.metagraph = bt.metagraph(netuid=self.config.netuid, network=self.subtensor.network, sync=False) # Make sure not to sync without passing subtensor + self.metagraph.sync(subtensor=self.subtensor) # Sync metagraph with subtensor. self.hotkeys = copy.deepcopy(self.metagraph.hotkeys) bt.logging.debug(str(self.metagraph)) diff --git a/openvalidators/utils.py b/openvalidators/utils.py index 2d72432..5c4b90c 100644 --- a/openvalidators/utils.py +++ b/openvalidators/utils.py @@ -87,7 +87,7 @@ def checkpoint(self): save_state(self) -def resync_metagraph(self): +def resync_metagraph(self: 'openvalidators.neuron.neuron'): """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph.""" bt.logging.info("resync_metagraph()") @@ -95,7 +95,7 @@ def resync_metagraph(self): previous_metagraph = copy.deepcopy(self.metagraph) # Sync the metagraph. - self.metagraph.sync() + self.metagraph.sync(subtensor=self.subtensor) # Check if the metagraph axon info has changed. metagraph_axon_info_updated = previous_metagraph.axons != self.metagraph.axons From b11c379a1d9eceddc76e6998143bbcba7de05b29 Mon Sep 17 00:00:00 2001 From: Eugene Date: Thu, 20 Jul 2023 15:44:35 -0700 Subject: [PATCH 11/44] increase count limit --- openvalidators/reward/reward.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openvalidators/reward/reward.py b/openvalidators/reward/reward.py index 570b7bf..292d5d6 100644 --- a/openvalidators/reward/reward.py +++ b/openvalidators/reward/reward.py @@ -35,7 +35,7 @@ def __init__(self) -> None: self.count = 0 self.mean = 0.0 self.var = 0.0 - self.count_limit = 1000 + self.count_limit = 3000 def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: """ From 5531e8e892cb64f7098be0f6643b2daac3bc755b Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 24 Jul 2023 08:51:27 -0700 Subject: [PATCH 12/44] comments --- openvalidators/forward.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/openvalidators/forward.py b/openvalidators/forward.py index 83faa51..376fc8c 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -52,15 +52,13 @@ def get_random_uids(self, k: int, exclude: List[int] = None) -> torch.LongTensor avail_uids.append(uid) if uid_is_not_excluded: candidate_uids.append(uid) - + # Check if candidate_uids contain enough for querying, if not grab all avaliable uids - if len(candidate_uids) > k: - available_uids = torch.tensor(candidate_uids, dtype=torch.int64).to(self.device) - else: - available_uids = torch.tensor(avail_uids, dtype=torch.int64).to(self.device) - + available_uids = candidate_uids + if len(candidate_uids) < k: + available_uids += random.sample([uid for uid in avail_uids if uid not in candidate_uids], k-len(candidate_uids)) - uids = torch.tensor(random.sample(available_uids.tolist(), k), dtype=torch.int64) + uids = torch.tensor(random.sample(available_uids, k), dtype=torch.int64) return uids From af4d0e9231aed240206925f25fcebec49f038141 Mon Sep 17 00:00:00 2001 From: Cameron Fairchild Date: Tue, 25 Jul 2023 20:08:25 -0400 Subject: [PATCH 13/44] make reqs looser --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index edda786..17d6b03 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -bittensor==5.3.1 -transformers<=4.28.0 +bittensor>=5.3.2,<6.0.0 +transformers<=4.28.0 wandb==0.15.3 datasets==2.12.0 plotly==5.14.1 @@ -10,4 +10,4 @@ click==8.1.3 torchmetrics sentencepiece numpy==1.21.6 -bittensor_wallet==0.0.4 +bittensor_wallet==0.0.5 From bacd9ee54224acd678839be8ceaba71fb2969992 Mon Sep 17 00:00:00 2001 From: Cameron Fairchild Date: Wed, 26 Jul 2023 18:35:58 -0400 Subject: [PATCH 14/44] pin below major instead --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 17d6b03..23c892f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,4 @@ click==8.1.3 torchmetrics sentencepiece numpy==1.21.6 -bittensor_wallet==0.0.5 +bittensor-wallet>=0.0.4,<1.0.0 From 027044b4db55466c4ff9805e4a8ff3882753c614 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Wed, 26 Jul 2023 23:39:22 +0000 Subject: [PATCH 15/44] bump (and loosen) datsets to fix python compat --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 18de33a..dcaddb0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ bittensor==5.2.0 transformers<=4.28.0 wandb==0.15.3 -datasets==2.12.0 +datasets>=2.14.0 plotly==5.14.1 networkx==3.1 scipy==1.10.1 From 5c74113792b63c5a26cf599894e55f43ba725023 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Wed, 26 Jul 2023 23:44:54 +0000 Subject: [PATCH 16/44] add bittensor bump hotfix for this issue --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index dcaddb0..f2d3030 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -bittensor==5.2.0 +bittensor==5.2.1 transformers<=4.28.0 wandb==0.15.3 datasets>=2.14.0 From 1867dad0e31ed876794db1fa017d91d7013618e0 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Wed, 26 Jul 2023 23:59:42 +0000 Subject: [PATCH 17/44] bump validators version and pin correct bittensor --- openvalidators/__init__.py | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/openvalidators/__init__.py b/openvalidators/__init__.py index 703e0e8..d2bcf5a 100644 --- a/openvalidators/__init__.py +++ b/openvalidators/__init__.py @@ -28,6 +28,6 @@ from . import weights from . import event -__version__ = "1.1.2" +__version__ = "1.1.3" version_split = __version__.split(".") __spec_version__ = (1000 * int(version_split[0])) + (10 * int(version_split[1])) + (1 * int(version_split[2])) diff --git a/requirements.txt b/requirements.txt index f2d3030..4ecc1d2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -bittensor==5.2.1 +bittensor>=5.2.1,<5.3.0 transformers<=4.28.0 wandb==0.15.3 datasets>=2.14.0 From 9deeb9125e9fa068f60f353e51554bc14dd40125 Mon Sep 17 00:00:00 2001 From: Julius ter Pelkwijk <1099127+mrseeker@users.noreply.github.com> Date: Thu, 27 Jul 2023 14:46:06 +0200 Subject: [PATCH 18/44] fixing spelling --- openvalidators/prompts.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/openvalidators/prompts.py b/openvalidators/prompts.py index 61e59bf..3cb451a 100644 --- a/openvalidators/prompts.py +++ b/openvalidators/prompts.py @@ -124,7 +124,7 @@ def find_unique_tags(input_text: str): # Request a follow-up question given a preceding context. -followup_request_template = "Ask one relevant and insightful question about the preceding context" +followup_request_template = "Ask a single relevant and insightful question about the preceding context" # Scores a summary on a scale from 0 to 10, given a context. augment_scoring_template = """Score the relevance, succinctness, and quality of a summary given a context. The context is within tags, and the question is within tags. Give a score between 0 and 10 in the tags, where 0 means the summary is irrelevant, and 10 means it's perfectly relevant and a good summary. Include a brief explanation for your score based solely on the context-summary relationship. @@ -348,16 +348,16 @@ def find_unique_tags(input_text: str): def followup_prompt( base_text:str, i:int = 0) -> str: if i == 0: - return f"{base_text}\n\n{followup_request_template}\n" + return f"{base_text}\n\n{followup_request_template}. Do not return an answer:\n" else: - return f"{base_text}\n\n{followup_request_template} and previous questions\n" + return f"{base_text}\n\n{followup_request_template} and previous questions. Do not return an answer:\n" def answer_prompt( base_text:str, followup:str ) -> str: - return f"{base_text}\n Question:{followup}\n Answer the question step by step and explain your thoughts" + return f"{base_text}\n\nQuestion: {followup}\nAnswer the last question step by step and explain your thoughts:\n" augment_request_template = "Summarize the preceding context" def augment_prompt( base_text:str ) -> str: random_level = random.randint(4, 8) - return f"{base_text}\n\n{augment_request_template} in {random_level} sentences.\n\n" \ No newline at end of file + return f"{base_text}\n\n{augment_request_template} in {random_level} sentences.\n" \ No newline at end of file From 288d76f1b312852959c28890fd8104ebf498ca54 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Fri, 28 Jul 2023 15:55:50 +0000 Subject: [PATCH 19/44] added historic diversity --- __init__.py | 0 openvalidators/reward/diversity.py | 60 +++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 5 deletions(-) create mode 100644 __init__.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index 185c53b..cd0f719 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -56,6 +56,10 @@ def __init__( self, device: str ): self.tokenizer = AutoTokenizer.from_pretrained( DiversityRewardModel.diversity_model_path ) self.model = AutoModel.from_pretrained( DiversityRewardModel.diversity_model_path ).to(self.device) self.reward_quantile = torch.tensor(0.1).to(self.device) + self.history_reward_quantile = torch.tensor(0.003).to(self.device) + self.historic_embeddings = torch.tensor([]).to(self.device) + self.history_size = 1000 + def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": """Runs a forward pass through the model. @@ -86,6 +90,50 @@ def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) return sentence_embeddings + def update_historic_embeddings( self, embeddings: torch.FloatTensor ): + def unique(embeddings): + unique_embeddings = [embeddings[0]] + last_emb = embeddings[0] + for emb in embeddings: + if not torch.all(torch.eq(emb, last_emb)): + unique_embeddings.append(emb) + last_emb = emb + return torch.stack(unique_embeddings) + + embeddings_unique = unique(embeddings) + historic_embeddings = torch.cat([self.historic_embeddings, embeddings_unique]) + self.historic_embeddings = historic_embeddings[-self.history_size:, :] + + def get_historic_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor: + def regularise( rewards ): + # sigmoid function that does the following mapping approximately + # 0.1 -> 0.05 + # 0.15 -> 0.4 + # 0.2 -> 0.9 + # >0.3 -> 1 + return 1/(1 + torch.exp(-50 * rewards + 8)) + + # Return None if history size is too small + if self.historic_embeddings.shape[0] < self.history_size: + return None + + # Calculate the pairwise cosine similarity. + similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings ) + + # Reward to be at the 10% quantile of the 1 - similarity score. + rewards = (1 - similarity).quantile(self.history_reward_quantile, dim = 1 ) + + return regularise(rewards) + + def get_batch_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor: + # Calculate the pairwise cosine similarity. + similarity = pairwise_cosine_similarity( embeddings, embeddings ) + + # Reward to be at the 10% quantile of the 1 - similarity score. + rewards = (1 - similarity).quantile(self.reward_quantile, dim = 1 ) + + return rewards + def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: # Check if completions are empty, return 0 if so @@ -95,11 +143,13 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch # Get embeddings for all completions. embeddings = self.get_embeddings( completions ) - # Calculate the pairwise cosine similarity. - similarity = pairwise_cosine_similarity( embeddings, embeddings ) + # Get batch rewards. + batch_rewards = self.get_batch_rewards(embeddings) - # Reward to be at the 10% quantile of the 1 - similarity score. - rewards = (1 - similarity).quantile(self.reward_quantile, dim = 1 ) + # get historic rewards. + historic_rewards = self.get_historic_rewards(embeddings) + self.update_historic_embeddings(embeddings) + # Return all - return rewards \ No newline at end of file + return batch_rewards * historic_rewards \ No newline at end of file From b287c76972dc44f84c9fedafc6aceadae0ba03b3 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Fri, 28 Jul 2023 16:54:18 +0000 Subject: [PATCH 20/44] fix --- openvalidators/reward/diversity.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index cd0f719..23e04fd 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -152,4 +152,7 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch self.update_historic_embeddings(embeddings) # Return all - return batch_rewards * historic_rewards \ No newline at end of file + if historic_rewards != None: + return batch_rewards * historic_rewards + else: + return batch_rewards \ No newline at end of file From d7596beab92fc2d15060907fa714f58cdfe59e5d Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:33:57 -0400 Subject: [PATCH 21/44] adds task_validator to masking pipeline --- openvalidators/config.py | 6 +++ openvalidators/event.py | 2 + openvalidators/neuron.py | 32 +++++++++----- openvalidators/reward/__init__.py | 3 +- openvalidators/reward/config.py | 1 + openvalidators/reward/task_validator.py | 59 +++++++++++++++++++++++++ 6 files changed, 90 insertions(+), 13 deletions(-) create mode 100644 openvalidators/reward/task_validator.py diff --git a/openvalidators/config.py b/openvalidators/config.py index 6565cef..d773032 100644 --- a/openvalidators/config.py +++ b/openvalidators/config.py @@ -241,6 +241,12 @@ def add_args(cls, parser): action="store_true", help="Dont apply the diversity reward model", default=False, + ) + parser.add_argument( + "--neuron.task_validator_off", + action="store_true", + help="Dont apply the task validator reward model", + default=False, ) parser.add_argument( diff --git a/openvalidators/event.py b/openvalidators/event.py index d790318..8aaec40 100644 --- a/openvalidators/event.py +++ b/openvalidators/event.py @@ -44,6 +44,7 @@ class EventSchema: rlhf_reward_model: Optional[List[float]] # Output vector of the rlhf reward model prompt_reward_model: Optional[List[float]] # Output vector of the prompt reward model relevance_filter: Optional[List[float]] # Output vector of the relevance scoring reward model + task_validator_filter: Optional[List[float]] # Weights data set_weights: Optional[List[List[float]]] @@ -54,6 +55,7 @@ def from_dict(event_dict: dict, disable_log_rewards: bool) -> 'EventSchema': rewards = { 'dahoas_reward_model': event_dict.get(RewardModelType.dahoas.value), 'blacklist_filter': event_dict.get(RewardModelType.blacklist.value), + 'task_validator_filter': event_dict.get(RewardModelType.task_validator.value), 'nsfw_filter': event_dict.get(RewardModelType.nsfw.value), 'relevance_filter': event_dict.get(RewardModelType.relevance.value), 'reciprocate_reward_model': event_dict.get(RewardModelType.reciprocate.value), diff --git a/openvalidators/neuron.py b/openvalidators/neuron.py index 0534a5d..cbd61cb 100644 --- a/openvalidators/neuron.py +++ b/openvalidators/neuron.py @@ -34,6 +34,7 @@ # Load gating models from openvalidators.reward import ( Blacklist, + TaskValidator, NSFWRewardModel, OpenAssistantRewardModel, ReciprocateRewardModel, @@ -180,24 +181,31 @@ def __init__(self): bt.logging.error(message) raise Exception(message) - + + # Masking functions self.blacklist = ( Blacklist() if not self.config.neuron.blacklist_off else MockRewardModel(RewardModelType.blacklist.value) ) - - self.masking_functions = [ - self.blacklist, - RelevanceRewardModel(device=self.device) - if not self.config.neuron.relevance_off + task_validator = ( + TaskValidator() if not self.config.neuron.task_validator_off + else MockRewardModel(RewardModelType.task_validator.value), + ) + relevance_model = ( + RelevanceRewardModel(device=self.device) if not self.config.neuron.relevance_off else MockRewardModel(RewardModelType.relevance.value), - DiversityRewardModel(device=self.device) - if not self.config.neuron.diversity_off + ) + diversity_model = ( + DiversityRewardModel(device=self.device) if not self.config.neuron.diversity_off else MockRewardModel(RewardModelType.diversity.value), - NSFWRewardModel(device=self.device) - if not self.config.neuron.nsfw_off - else MockRewardModel(RewardModelType.nsfw.value), - ] + ) + nsfw_model = ( + NSFWRewardModel(device=self.device) if not self.config.neuron.nsfw_off + else MockRewardModel(RewardModelType.nsfw.value), + ) + + self.masking_functions = [self.blacklist, task_validator, relevance_model, diversity_model, nsfw_model] bt.logging.debug(str(self.reward_functions)) + bt.logging.debug(str(self.masking_functions)) # Init the event loop. self.loop = asyncio.get_event_loop() diff --git a/openvalidators/reward/__init__.py b/openvalidators/reward/__init__.py index 3277b6e..c330866 100644 --- a/openvalidators/reward/__init__.py +++ b/openvalidators/reward/__init__.py @@ -1,4 +1,5 @@ from .blacklist import Blacklist +from .task_validator import TaskValidator from .nsfw import NSFWRewardModel from .open_assistant import OpenAssistantRewardModel from .reciprocate import ReciprocateRewardModel @@ -8,4 +9,4 @@ from .dahoas import DahoasRewardModel from .diversity import DiversityRewardModel from .prompt import PromptRewardModel -from .config import RewardModelType, DefaultRewardFrameworkConfig +from .config import RewardModelType, DefaultRewardFrameworkConfig \ No newline at end of file diff --git a/openvalidators/reward/config.py b/openvalidators/reward/config.py index 2f4b63b..86354cb 100644 --- a/openvalidators/reward/config.py +++ b/openvalidators/reward/config.py @@ -28,6 +28,7 @@ class RewardModelType(Enum): blacklist = 'blacklist_filter' nsfw = 'nsfw_filter' relevance = 'relevance_filter' + task_validator = 'task_validator_filter' @dataclass(frozen=True) diff --git a/openvalidators/reward/task_validator.py b/openvalidators/reward/task_validator.py new file mode 100644 index 0000000..c22ac4a --- /dev/null +++ b/openvalidators/reward/task_validator.py @@ -0,0 +1,59 @@ +# The MIT License (MIT) +# Copyright © 2021 Yuma Rao + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +import torch +from typing import List +from .config import RewardModelType +from .reward import BaseRewardModel + + +class TaskValidator( BaseRewardModel ): + + @property + def name(self) -> str: return RewardModelType.task_validator.value + + def __init__(self): + super().__init__() + + def reward( self, prompt: str, completion: str, name: str ) -> float: + answer_keywords = ['Answer:'] + question_keywords = ['Question:'] + summary_keywords = ['Summary:'] + + completion_contains_answer = any(answer_keyword in completion for answer_keyword in answer_keywords) + completion_contains_question = any(question_keyword in completion for question_keyword in question_keywords) + completion_contains_summary = any(summary_keyword in completion for summary_keyword in summary_keywords) + + if name.startswith('followup') and completion_contains_answer: + return 0.0 + + if name.startswith('answer') and completion_contains_question: + return 0.0 + + if name != 'augment' and completion_contains_summary: + return 0.0 + + return 1 + + def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: + return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32) + + def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: + return rewards + + def reset(self): + pass + From 7dfabc411e9f36e93e85310fa90f3f8708ddeaeb Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:34:26 -0400 Subject: [PATCH 22/44] adds unit tests for task validator --- tests/reward/__init__.py | 0 tests/reward/test_task_validator.py | 83 +++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 tests/reward/__init__.py create mode 100644 tests/reward/test_task_validator.py diff --git a/tests/reward/__init__.py b/tests/reward/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/reward/test_task_validator.py b/tests/reward/test_task_validator.py new file mode 100644 index 0000000..cc13a0f --- /dev/null +++ b/tests/reward/test_task_validator.py @@ -0,0 +1,83 @@ +import unittest +from openvalidators.reward.task_validator import TaskValidator + +class TaskValidatorTestCase(unittest.TestCase): + """ + This class contains unit tests for the TaskValidator class. + + The tests cover different scenarios for the `reward` method of the TaskValidator class. + The `reward` method is expected to return a reward based on the task name and the completion text. + """ + + def setUp(self): + self.validator = TaskValidator() + + def test_followup_with_answer_keyword(self): + """ + Test if the reward method returns 0 when the task "name" starts with 'followup' (question generation) + and the completion contains the 'Answer:' keyword. + """ + for i in range(0, 4): + name = f'followup{i}' + completion = 'Question: This is a test question?\nAnswer: This is a test answer.' + self.assertEqual(self.validator.reward('', completion, name), 0.0) + + def test_answer_with_question_keyword(self): + """ + Test if the reward method returns 0 when the task "name" is 'answer' (answer generation) + and the completion contains the 'Question:' keyword. + """ + for i in range(0, 4): + name = f'answer{i}' + completion = 'Question: This is a test question?\nAnswer: This is a test answer.' + self.assertEqual(self.validator.reward('', completion, name), 0.0) + + def test_followup_and_answer_with_summary_keyword(self): + """ + Test if the reward method returns 0 when the task "name" is different from "augment" (summarization) + and the completion contains the 'Summary:' keyword. + """ + for name in ['followup0', 'followup1', 'followup2', 'followup3', 'answer0', 'answer1', 'answer2', 'answer3']: + completion = 'Summary: This is a test summary.' + self.assertEqual(self.validator.reward('', completion, name), 0.0) + + def test_reward_valid_followup(self): + """ + Test if the reward method returns 1 when the task "name" starts with 'followup' (question generation) + and the completion contains a question + """ + for i in range(0, 4): + name = f'followup{i}' + completion = 'Question: This is a test question?' + self.assertEqual(self.validator.reward('', completion, name), 1.0) + + def test_reward_valid_answer(self): + """ + Test if the reward method returns 1 when the task "name" is 'answer' (answer generation) + and the completion contains an answer + """ + for i in range(0, 4): + name = f'answer{i}' + completion = 'Answer: This is a test answer.' + self.assertEqual(self.validator.reward('', completion, name), 1.0) + + def test_reward_valid_augment(self): + """ + Test if the reward method returns 1 when the task "name" is 'augment' (summarization) + and the completion contains the a summary. + """ + name = 'augment' + completion = 'Summary: This is a test summary.' + self.assertEqual(self.validator.reward('', completion, name), 1.0) + + def test_reward_valid_other(self): + """ + Test if the reward method returns 1 when the task "name" is different from "augment", "followup", and "answer" + and the completion does not contain the 'Summary:', 'Answer:', and 'Question:' keywords. + """ + for name in ['followup0', 'followup1', 'followup2', 'followup3', 'answer0', 'answer1', 'answer2', 'answer3']: + completion = 'This is a test completion.' + self.assertEqual(self.validator.reward('', completion, name), 1.0) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From dbb4760636aa5fda1b5c050bcca0e0940ddd89de Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:34:43 -0400 Subject: [PATCH 23/44] updates test_event with task validator --- tests/test_event.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_event.py b/tests/test_event.py index 3ad566f..7fb9f2b 100644 --- a/tests/test_event.py +++ b/tests/test_event.py @@ -45,6 +45,7 @@ def test_event_from_dict_all_forward_columns_match(self): RewardModelType.rlhf.value: [1.0], RewardModelType.prompt.value: [1.0], RewardModelType.relevance.value: [1.0], + RewardModelType.task_validator.value: [1.0] } # Act @@ -102,6 +103,7 @@ def test_event_from_dict_forward_no_reward_logging(self): assert event.rlhf_reward_model is None assert event.prompt_reward_model is None assert event.relevance_filter is None + assert event.task_validator_filter is None def test_event_from_dict_forward_reward_logging_mismatch(self): """Test that all default columns logged on the forward pass are correctly converted and that @@ -142,4 +144,5 @@ def test_event_from_dict_forward_reward_logging_mismatch(self): assert event.rlhf_reward_model is None assert event.prompt_reward_model is None assert event.relevance_filter is None + assert event.task_validator_filter is None From 746ca8d47d18b8035f5ac35c7e690ebdca943ae1 Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Mon, 31 Jul 2023 15:27:58 -0400 Subject: [PATCH 24/44] removes comma from masking model initialization --- openvalidators/neuron.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openvalidators/neuron.py b/openvalidators/neuron.py index cbd61cb..f1cc385 100644 --- a/openvalidators/neuron.py +++ b/openvalidators/neuron.py @@ -188,19 +188,19 @@ def __init__(self): ) task_validator = ( TaskValidator() if not self.config.neuron.task_validator_off - else MockRewardModel(RewardModelType.task_validator.value), + else MockRewardModel(RewardModelType.task_validator.value) ) relevance_model = ( RelevanceRewardModel(device=self.device) if not self.config.neuron.relevance_off - else MockRewardModel(RewardModelType.relevance.value), + else MockRewardModel(RewardModelType.relevance.value) ) diversity_model = ( DiversityRewardModel(device=self.device) if not self.config.neuron.diversity_off - else MockRewardModel(RewardModelType.diversity.value), + else MockRewardModel(RewardModelType.diversity.value) ) nsfw_model = ( NSFWRewardModel(device=self.device) if not self.config.neuron.nsfw_off - else MockRewardModel(RewardModelType.nsfw.value), + else MockRewardModel(RewardModelType.nsfw.value) ) self.masking_functions = [self.blacklist, task_validator, relevance_model, diversity_model, nsfw_model] From 06dc7518bd73cfb566f573ec5563003a5b6ff4ae Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Mon, 31 Jul 2023 15:45:13 -0400 Subject: [PATCH 25/44] organize keyword order --- openvalidators/reward/task_validator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openvalidators/reward/task_validator.py b/openvalidators/reward/task_validator.py index c22ac4a..88a101c 100644 --- a/openvalidators/reward/task_validator.py +++ b/openvalidators/reward/task_validator.py @@ -29,9 +29,9 @@ def __init__(self): super().__init__() def reward( self, prompt: str, completion: str, name: str ) -> float: - answer_keywords = ['Answer:'] - question_keywords = ['Question:'] summary_keywords = ['Summary:'] + question_keywords = ['Question:'] + answer_keywords = ['Answer:'] completion_contains_answer = any(answer_keyword in completion for answer_keyword in answer_keywords) completion_contains_question = any(question_keyword in completion for question_keyword in question_keywords) From 327aa9d417f1f2e0f3806031b13b42ea97ce78c0 Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Mon, 31 Jul 2023 16:21:11 -0400 Subject: [PATCH 26/44] complement prompt --- openvalidators/prompts.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/openvalidators/prompts.py b/openvalidators/prompts.py index 61e59bf..8cecc9b 100644 --- a/openvalidators/prompts.py +++ b/openvalidators/prompts.py @@ -124,7 +124,7 @@ def find_unique_tags(input_text: str): # Request a follow-up question given a preceding context. -followup_request_template = "Ask one relevant and insightful question about the preceding context" +followup_request_template = "Ask a single relevant and insightful question about the preceding context" # Scores a summary on a scale from 0 to 10, given a context. augment_scoring_template = """Score the relevance, succinctness, and quality of a summary given a context. The context is within tags, and the question is within tags. Give a score between 0 and 10 in the tags, where 0 means the summary is irrelevant, and 10 means it's perfectly relevant and a good summary. Include a brief explanation for your score based solely on the context-summary relationship. @@ -348,16 +348,16 @@ def find_unique_tags(input_text: str): def followup_prompt( base_text:str, i:int = 0) -> str: if i == 0: - return f"{base_text}\n\n{followup_request_template}\n" + return f"{base_text}\n\n{followup_request_template}\n. Do not try to return an answer or a summary:" else: - return f"{base_text}\n\n{followup_request_template} and previous questions\n" + return f"{base_text}\n\n{followup_request_template} and previous questions. Do not try to return an answer or a summary:\n" def answer_prompt( base_text:str, followup:str ) -> str: - return f"{base_text}\n Question:{followup}\n Answer the question step by step and explain your thoughts" + return f"{base_text}\n\nQuestion:{followup}\nAnswer the question step by step and explain your thoughts. Do not include questions or summaries in your answer." augment_request_template = "Summarize the preceding context" def augment_prompt( base_text:str ) -> str: random_level = random.randint(4, 8) - return f"{base_text}\n\n{augment_request_template} in {random_level} sentences.\n\n" \ No newline at end of file + return f"{base_text}\n\n{augment_request_template} in {random_level} sentences. Do not try to create questions or answers for your summarization.\n\n" \ No newline at end of file From b2ac365a353c737ceec8bb6e73d6950cb2075959 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Mon, 31 Jul 2023 20:55:32 +0000 Subject: [PATCH 27/44] historic similarity check at range 500 - 1500 --- openvalidators/reward/diversity.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index 23e04fd..7e0941e 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -58,7 +58,7 @@ def __init__( self, device: str ): self.reward_quantile = torch.tensor(0.1).to(self.device) self.history_reward_quantile = torch.tensor(0.003).to(self.device) self.historic_embeddings = torch.tensor([]).to(self.device) - self.history_size = 1000 + self.history_range = (500, 1500) # store 30 batches of completions. def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": @@ -102,23 +102,19 @@ def unique(embeddings): embeddings_unique = unique(embeddings) historic_embeddings = torch.cat([self.historic_embeddings, embeddings_unique]) - self.historic_embeddings = historic_embeddings[-self.history_size:, :] + self.historic_embeddings = historic_embeddings[-self.history_range[1]:, :] def get_historic_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor: def regularise( rewards ): - # sigmoid function that does the following mapping approximately - # 0.1 -> 0.05 - # 0.15 -> 0.4 - # 0.2 -> 0.9 - # >0.3 -> 1 - return 1/(1 + torch.exp(-50 * rewards + 8)) + # sigmoid function that cutoff at 0.05 approximately + return 1/(1 + torch.exp(-1000 * rewards + 50)) # Return None if history size is too small - if self.historic_embeddings.shape[0] < self.history_size: + if self.historic_embeddings.shape[0] < self.history_range[1]: return None # Calculate the pairwise cosine similarity. - similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings ) + similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings[self.history_range[0]:] ) # Reward to be at the 10% quantile of the 1 - similarity score. rewards = (1 - similarity).quantile(self.history_reward_quantile, dim = 1 ) From 33dfde6c80779a027feae71676cd37d2b25caaf7 Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Mon, 31 Jul 2023 17:34:18 -0400 Subject: [PATCH 28/44] adds extra verification for topic shifting on augment use cases --- openvalidators/reward/task_validator.py | 10 +++++++--- tests/reward/test_task_validator.py | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/openvalidators/reward/task_validator.py b/openvalidators/reward/task_validator.py index 88a101c..1eacc2b 100644 --- a/openvalidators/reward/task_validator.py +++ b/openvalidators/reward/task_validator.py @@ -37,13 +37,17 @@ def reward( self, prompt: str, completion: str, name: str ) -> float: completion_contains_question = any(question_keyword in completion for question_keyword in question_keywords) completion_contains_summary = any(summary_keyword in completion for summary_keyword in summary_keywords) - if name.startswith('followup') and completion_contains_answer: + is_summarization_prompt = name == 'augment' + is_question_prompt = name.startswith('followup') + is_answer_prompt = name.startswith('answer') + + if (is_summarization_prompt or is_question_prompt) and completion_contains_answer: return 0.0 - if name.startswith('answer') and completion_contains_question: + if (is_summarization_prompt or is_answer_prompt) and completion_contains_question: return 0.0 - if name != 'augment' and completion_contains_summary: + if not is_summarization_prompt and completion_contains_summary: return 0.0 return 1 diff --git a/tests/reward/test_task_validator.py b/tests/reward/test_task_validator.py index cc13a0f..8ebefc7 100644 --- a/tests/reward/test_task_validator.py +++ b/tests/reward/test_task_validator.py @@ -12,6 +12,15 @@ class TaskValidatorTestCase(unittest.TestCase): def setUp(self): self.validator = TaskValidator() + def test_augment_with_answer_keyword(self): + """ + Test if the reward method returns 0 when the task "name" starts with 'augment' (summarization) + and the completion contains the 'Answer:' keyword. + """ + name = f'augment' + completion = "Summary: test summary\nAnswer: Test answer" + self.assertEqual(self.validator.reward('', completion, name), 0.0) + def test_followup_with_answer_keyword(self): """ Test if the reward method returns 0 when the task "name" starts with 'followup' (question generation) @@ -22,6 +31,15 @@ def test_followup_with_answer_keyword(self): completion = 'Question: This is a test question?\nAnswer: This is a test answer.' self.assertEqual(self.validator.reward('', completion, name), 0.0) + def test_augment_with_question_keyword(self): + """ + Test if the reward method returns 0 when the task "name" starts with 'augment' (summarization) + and the completion contains the 'Question:' keyword. + """ + name = f'augment' + completion = "Summary: test summary\nQuestion: This is a test question?" + self.assertEqual(self.validator.reward('', completion, name), 0.0) + def test_answer_with_question_keyword(self): """ Test if the reward method returns 0 when the task "name" is 'answer' (answer generation) From 134769c601fa8584f0b32f4cf63579aecad203a4 Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Mon, 31 Jul 2023 17:35:51 -0400 Subject: [PATCH 29/44] sets task validator to be case insensitive --- openvalidators/reward/task_validator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openvalidators/reward/task_validator.py b/openvalidators/reward/task_validator.py index 1eacc2b..6b7a1af 100644 --- a/openvalidators/reward/task_validator.py +++ b/openvalidators/reward/task_validator.py @@ -33,9 +33,9 @@ def reward( self, prompt: str, completion: str, name: str ) -> float: question_keywords = ['Question:'] answer_keywords = ['Answer:'] - completion_contains_answer = any(answer_keyword in completion for answer_keyword in answer_keywords) - completion_contains_question = any(question_keyword in completion for question_keyword in question_keywords) - completion_contains_summary = any(summary_keyword in completion for summary_keyword in summary_keywords) + completion_contains_answer = any(answer_keyword.lower() in completion.lower() for answer_keyword in answer_keywords) + completion_contains_question = any(question_keyword.lower() in completion.lower() for question_keyword in question_keywords) + completion_contains_summary = any(summary_keyword.lower() in completion.lower() for summary_keyword in summary_keywords) is_summarization_prompt = name == 'augment' is_question_prompt = name.startswith('followup') From 7062e277e4e39c8cf63f5746689ed2b8c8c5d246 Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Tue, 1 Aug 2023 12:17:43 -0400 Subject: [PATCH 30/44] adds hotfix for empty entries in dataset + tests --- openvalidators/dataset.py | 16 +++++++++++----- tests/test_dataset.py | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 5 deletions(-) create mode 100644 tests/test_dataset.py diff --git a/openvalidators/dataset.py b/openvalidators/dataset.py index 215f10f..96683c4 100644 --- a/openvalidators/dataset.py +++ b/openvalidators/dataset.py @@ -27,11 +27,17 @@ def __init__(self): self.openwebtext = iter( load_dataset("openwebtext", split="train", streaming=True).shuffle(seed=seed, buffer_size=10000) ) self.red_pajama = iter( load_dataset("togethercomputer/RedPajama-Data-1T", 'default', split='train', streaming=True).shuffle(seed=seed, buffer_size=10000) ) - def __next__(self): - if random.random() < 0.5: - return {"text": next(self.openwebtext)["text"]} - else: - return {"text": next(self.red_pajama)["text"]} + def __next__(self): + while True: + bt.logging.debug('Retrieving data from dataset...') + if random.random() < 0.5: + text = next(self.openwebtext)["text"] + else: + text = next(self.red_pajama)["text"] + + # Check if the text is not empty or does not consist only of newline characters + if text.strip(): + return {"text": text} class MockDataset(Iterator): diff --git a/tests/test_dataset.py b/tests/test_dataset.py new file mode 100644 index 0000000..c220768 --- /dev/null +++ b/tests/test_dataset.py @@ -0,0 +1,26 @@ +import unittest +from openvalidators.dataset import Dataset + + +class DatasetTestCase(unittest.TestCase): + def test_next_skips_empty_and_newline_only_strings(self): + mock_data = iter([{"text": ""}, {"text": "\n\n"}, {"text": "Non-empty text"}]) + dataset = Dataset() + dataset.openwebtext = mock_data + dataset.red_pajama = mock_data + + # Test that __next__ skips empty texts and texts that consist only of newline characters + self.assertEqual(dataset.__next__(), {"text": "Non-empty text"}) + + def test_next_returns_regular_strings(self): + mock_data = iter([{"text": "Non-empty text"}]) + dataset = Dataset() + dataset.openwebtext = mock_data + dataset.red_pajama = mock_data + + # Test that __next__ returns a non-empty text + self.assertEqual(dataset.__next__(), {"text": "Non-empty text"}) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From c10553331cae96272ebe7a295935d7c25cc01183 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Tue, 1 Aug 2023 17:23:49 +0000 Subject: [PATCH 31/44] longer history; removed normalization; setting bottom_k = 5 --- openvalidators/reward/diversity.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index 7e0941e..fcffd7e 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -56,10 +56,9 @@ def __init__( self, device: str ): self.tokenizer = AutoTokenizer.from_pretrained( DiversityRewardModel.diversity_model_path ) self.model = AutoModel.from_pretrained( DiversityRewardModel.diversity_model_path ).to(self.device) self.reward_quantile = torch.tensor(0.1).to(self.device) - self.history_reward_quantile = torch.tensor(0.003).to(self.device) + self.history_reward_bottom_k = 5 self.historic_embeddings = torch.tensor([]).to(self.device) - self.history_range = (500, 1500) # store 30 batches of completions. - + self.history_range = (500, 5500) def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": """Runs a forward pass through the model. @@ -110,14 +109,14 @@ def regularise( rewards ): return 1/(1 + torch.exp(-1000 * rewards + 50)) # Return None if history size is too small - if self.historic_embeddings.shape[0] < self.history_range[1]: + if self.historic_embeddings.shape[0] < self.history_range[0]: return None # Calculate the pairwise cosine similarity. similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings[self.history_range[0]:] ) # Reward to be at the 10% quantile of the 1 - similarity score. - rewards = (1 - similarity).quantile(self.history_reward_quantile, dim = 1 ) + rewards = torch.topk((1 - similarity), self.history_reward_bottom_k, largest = False)[0][:, -1] return regularise(rewards) @@ -151,4 +150,10 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch if historic_rewards != None: return batch_rewards * historic_rewards else: - return batch_rewards \ No newline at end of file + return batch_rewards + + def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: + # def regularise( rewards ): + # return 1/(1 + torch.exp(-100 * rewards + 12)) + # return regularise(rewards) + return rewards \ No newline at end of file From d861073beb52bbc36958be2016b2d56338b5a11e Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:45:24 -0400 Subject: [PATCH 32/44] remove pip reference from README --- README.md | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 7863776..581c1a8 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ # **Open Validators** [![Discord Chat](https://img.shields.io/discord/308323056592486420.svg)](https://discord.gg/bittensor) -[![PyPI version](https://badge.fury.io/py/openvalidators.svg)](https://badge.fury.io/py/openvalidators) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) --- @@ -19,7 +18,7 @@ It offers several functionalities, such as: The main goal of this repository is to facilitate the interaction with the Bittensor network by providing a set of open-source validators to the community. The current validator implementation queries the network for responses and -evaluations using carefully crafted prompts, that are later evaluated by a large foundation GPT-J reward model. +evaluations using carefully crafted prompts using CoT, that are later evaluated by a pipeline of reward functions, including diversity, relevance, rlhf, among others. Additionally, the repository provides an analysis and data toolkit that allows users to analyze the data generated from the validator's interaction with the network. By default, the validator collects various data points, such as question @@ -69,14 +68,7 @@ There are currently four main avenues for engaging with this repository: - Serves individuals, researchers, and developers who seek to create datasets for the community's miners. # Install -There are two ways to use OpenTensor validators: - -1. With pip: -```bash -$ pip3 install openvalidators -``` - -2. From source: +From source: ```bash $ git clone https://github.com/opentensor/validators.git $ pip3 install -e openvalidators/ From de8d1ed6fbf2284ff0c94c82873c97e5a7f45e87 Mon Sep 17 00:00:00 2001 From: Eugene Date: Tue, 1 Aug 2023 14:46:36 -0400 Subject: [PATCH 33/44] backwards compat --- openvalidators/neuron.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/openvalidators/neuron.py b/openvalidators/neuron.py index e6c7962..d905d4f 100644 --- a/openvalidators/neuron.py +++ b/openvalidators/neuron.py @@ -88,11 +88,9 @@ def __init__(self): self.wallet = bt.wallet(config=self.config) self.wallet.create_if_non_existent() if not self.config.wallet._mock: - bt.utils.reregister( - wallet = self.wallet, - subtensor=self.subtensor, - netuid=self.config.netuid - ) + if not self.subtensor.is_hotkey_registered_on_subnet(hotkey_ss58=self.wallet.hotkey.ss58_address, netuid=self.config.netuid): + raise Exception(f'Wallet not currently registered on netuid {self.config.netuid}, please first register wallet before running') + bt.logging.debug(str(self.wallet)) # Init metagraph. From ac309a9f14c2862168f270e4d42aa1ecc6351417 Mon Sep 17 00:00:00 2001 From: Eugene Date: Tue, 1 Aug 2023 14:52:44 -0400 Subject: [PATCH 34/44] requirements changes --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 23c892f..2be3e2c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -bittensor>=5.3.2,<6.0.0 +bittensor>=5.2.0,<6.0.0 transformers<=4.28.0 wandb==0.15.3 datasets==2.12.0 From 068e53ccfa56c40e0448365f28eb7c2ea5342692 Mon Sep 17 00:00:00 2001 From: Cameron Fairchild Date: Tue, 1 Aug 2023 16:17:04 -0400 Subject: [PATCH 35/44] Update requirements.txt --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2be3e2c..ae2e275 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,3 @@ click==8.1.3 torchmetrics sentencepiece numpy==1.21.6 -bittensor-wallet>=0.0.4,<1.0.0 From 492cfa383e78a95251e48660ac77d05e42e8336c Mon Sep 17 00:00:00 2001 From: Eugene Date: Tue, 1 Aug 2023 15:07:18 -0400 Subject: [PATCH 36/44] dataset requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ae2e275..2639946 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ bittensor>=5.2.0,<6.0.0 transformers<=4.28.0 wandb==0.15.3 -datasets==2.12.0 +datasets>=2.12.0, < 2.14.0 plotly==5.14.1 networkx==3.1 scipy==1.10.1 From c4846ef152a233bb95142b36f8e6429775e3c78f Mon Sep 17 00:00:00 2001 From: Eugene Date: Tue, 1 Aug 2023 15:07:53 -0400 Subject: [PATCH 37/44] = --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2639946..8f47125 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ bittensor>=5.2.0,<6.0.0 transformers<=4.28.0 wandb==0.15.3 -datasets>=2.12.0, < 2.14.0 +datasets>=2.12.0, <= 2.14.0 plotly==5.14.1 networkx==3.1 scipy==1.10.1 From 483b9d49ba1d35418c93e48cc7ebb95fa72227bf Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Tue, 1 Aug 2023 15:08:34 -0400 Subject: [PATCH 38/44] adds changelog to repo --- CHANGELOG.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..83683b6 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,19 @@ +# Changelog + +## 1.1.3 / 2023-08-? + +### What’s Changed + +- Adds subtensor to metagraph sync by @camfairchild in #79 +- Fix wandb weights format logging by @p-ferreira in #88 +- Adds netuid tag to wandb runs by @p-ferreira in #95 +- Implements GPU cleaning for optmization by @Eugene-hu in #96 + +(NOT MERGED YET, DELETE THIS LINE ONCE CHANGES ARE ON STAGING) +- Adds compatibility with bittensor 5.3.3 by @camfairchild in #107 +- Adds historic diversity component by @isabella618033 in #111 +- Improvements on diveristy model by @isabella618033 and @Eugene-hu in #111 +- Prompt improvements by @mrseeker in #110 and @p-ferreira in #112 +- Adds Task Validator Filter to reward pipeline by @p-ferreira in #112 +- Fix for empty data retrieval from datasets by @p-ferreira in #113 +- Deprecates pip usage by @p-ferreira in #114 From ecc7d5ce15bd65b02ce4cb93ec024084a00013ff Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Tue, 1 Aug 2023 20:15:14 +0000 Subject: [PATCH 39/44] keep the original regularization --- openvalidators/reward/diversity.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index fcffd7e..8b24ab1 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -58,7 +58,7 @@ def __init__( self, device: str ): self.reward_quantile = torch.tensor(0.1).to(self.device) self.history_reward_bottom_k = 5 self.historic_embeddings = torch.tensor([]).to(self.device) - self.history_range = (500, 5500) + self.history_range = (500, 15500) def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": """Runs a forward pass through the model. @@ -130,7 +130,6 @@ def get_batch_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTenso return rewards def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - # Check if completions are empty, return 0 if so if len(completions) == 0: return torch.tensor([]).to(self.device) @@ -150,10 +149,4 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch if historic_rewards != None: return batch_rewards * historic_rewards else: - return batch_rewards - - def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: - # def regularise( rewards ): - # return 1/(1 + torch.exp(-100 * rewards + 12)) - # return regularise(rewards) - return rewards \ No newline at end of file + return batch_rewards \ No newline at end of file From 30c554d4a01787aeec1365e143afd632df6e0b28 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Tue, 1 Aug 2023 21:05:38 +0000 Subject: [PATCH 40/44] removed init.py --- __init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 __init__.py diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29..0000000 From d34a88b885f2f2496ba3b79011dc28c046efd449 Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Wed, 2 Aug 2023 11:02:50 -0400 Subject: [PATCH 41/44] updates changelog --- CHANGELOG.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83683b6..5bce7bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,6 @@ - Fix wandb weights format logging by @p-ferreira in #88 - Adds netuid tag to wandb runs by @p-ferreira in #95 - Implements GPU cleaning for optmization by @Eugene-hu in #96 - -(NOT MERGED YET, DELETE THIS LINE ONCE CHANGES ARE ON STAGING) - Adds compatibility with bittensor 5.3.3 by @camfairchild in #107 - Adds historic diversity component by @isabella618033 in #111 - Improvements on diveristy model by @isabella618033 and @Eugene-hu in #111 From b5cac4c54144bf930b5465039eb6ae8708ed8cdf Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Wed, 2 Aug 2023 15:28:49 +0000 Subject: [PATCH 42/44] bottom k 5 -> 2 --- openvalidators/reward/diversity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index 8b24ab1..f689e13 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -56,7 +56,7 @@ def __init__( self, device: str ): self.tokenizer = AutoTokenizer.from_pretrained( DiversityRewardModel.diversity_model_path ) self.model = AutoModel.from_pretrained( DiversityRewardModel.diversity_model_path ).to(self.device) self.reward_quantile = torch.tensor(0.1).to(self.device) - self.history_reward_bottom_k = 5 + self.history_reward_bottom_k = 2 self.historic_embeddings = torch.tensor([]).to(self.device) self.history_range = (500, 15500) From 14e7db4d0b4b953b97e0e19c5c7f604825e3beb6 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Wed, 2 Aug 2023 16:18:21 +0000 Subject: [PATCH 43/44] bug fix --- openvalidators/reward/diversity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index f689e13..6ba8024 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -109,13 +109,13 @@ def regularise( rewards ): return 1/(1 + torch.exp(-1000 * rewards + 50)) # Return None if history size is too small - if self.historic_embeddings.shape[0] < self.history_range[0]: + if self.historic_embeddings.shape[0] < (self.history_range[0] + self.history_reward_bottom_k): return None # Calculate the pairwise cosine similarity. similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings[self.history_range[0]:] ) - # Reward to be at the 10% quantile of the 1 - similarity score. + # Reward to be at the bottom_k smallest of the 1 - similarity score. rewards = torch.topk((1 - similarity), self.history_reward_bottom_k, largest = False)[0][:, -1] return regularise(rewards) From 96210b3dc32dcaa3f71550e982d260c469b1ef8e Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Wed, 2 Aug 2023 12:37:36 -0400 Subject: [PATCH 44/44] update release date on changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bce7bb..a5c3738 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## 1.1.3 / 2023-08-? +## 1.1.3 / 2023-08-02 ### What’s Changed