From 288d76f1b312852959c28890fd8104ebf498ca54 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Fri, 28 Jul 2023 15:55:50 +0000 Subject: [PATCH 1/6] added historic diversity --- __init__.py | 0 openvalidators/reward/diversity.py | 60 +++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 5 deletions(-) create mode 100644 __init__.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index 185c53b..cd0f719 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -56,6 +56,10 @@ def __init__( self, device: str ): self.tokenizer = AutoTokenizer.from_pretrained( DiversityRewardModel.diversity_model_path ) self.model = AutoModel.from_pretrained( DiversityRewardModel.diversity_model_path ).to(self.device) self.reward_quantile = torch.tensor(0.1).to(self.device) + self.history_reward_quantile = torch.tensor(0.003).to(self.device) + self.historic_embeddings = torch.tensor([]).to(self.device) + self.history_size = 1000 + def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": """Runs a forward pass through the model. @@ -86,6 +90,50 @@ def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) return sentence_embeddings + def update_historic_embeddings( self, embeddings: torch.FloatTensor ): + def unique(embeddings): + unique_embeddings = [embeddings[0]] + last_emb = embeddings[0] + for emb in embeddings: + if not torch.all(torch.eq(emb, last_emb)): + unique_embeddings.append(emb) + last_emb = emb + return torch.stack(unique_embeddings) + + embeddings_unique = unique(embeddings) + historic_embeddings = torch.cat([self.historic_embeddings, embeddings_unique]) + self.historic_embeddings = historic_embeddings[-self.history_size:, :] + + def get_historic_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor: + def regularise( rewards ): + # sigmoid function that does the following mapping approximately + # 0.1 -> 0.05 + # 0.15 -> 0.4 + # 0.2 -> 0.9 + # >0.3 -> 1 + return 1/(1 + torch.exp(-50 * rewards + 8)) + + # Return None if history size is too small + if self.historic_embeddings.shape[0] < self.history_size: + return None + + # Calculate the pairwise cosine similarity. + similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings ) + + # Reward to be at the 10% quantile of the 1 - similarity score. + rewards = (1 - similarity).quantile(self.history_reward_quantile, dim = 1 ) + + return regularise(rewards) + + def get_batch_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor: + # Calculate the pairwise cosine similarity. + similarity = pairwise_cosine_similarity( embeddings, embeddings ) + + # Reward to be at the 10% quantile of the 1 - similarity score. + rewards = (1 - similarity).quantile(self.reward_quantile, dim = 1 ) + + return rewards + def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: # Check if completions are empty, return 0 if so @@ -95,11 +143,13 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch # Get embeddings for all completions. embeddings = self.get_embeddings( completions ) - # Calculate the pairwise cosine similarity. - similarity = pairwise_cosine_similarity( embeddings, embeddings ) + # Get batch rewards. + batch_rewards = self.get_batch_rewards(embeddings) - # Reward to be at the 10% quantile of the 1 - similarity score. - rewards = (1 - similarity).quantile(self.reward_quantile, dim = 1 ) + # get historic rewards. + historic_rewards = self.get_historic_rewards(embeddings) + self.update_historic_embeddings(embeddings) + # Return all - return rewards \ No newline at end of file + return batch_rewards * historic_rewards \ No newline at end of file From b287c76972dc44f84c9fedafc6aceadae0ba03b3 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Fri, 28 Jul 2023 16:54:18 +0000 Subject: [PATCH 2/6] fix --- openvalidators/reward/diversity.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index cd0f719..23e04fd 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -152,4 +152,7 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch self.update_historic_embeddings(embeddings) # Return all - return batch_rewards * historic_rewards \ No newline at end of file + if historic_rewards != None: + return batch_rewards * historic_rewards + else: + return batch_rewards \ No newline at end of file From b2ac365a353c737ceec8bb6e73d6950cb2075959 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Mon, 31 Jul 2023 20:55:32 +0000 Subject: [PATCH 3/6] historic similarity check at range 500 - 1500 --- openvalidators/reward/diversity.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index 23e04fd..7e0941e 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -58,7 +58,7 @@ def __init__( self, device: str ): self.reward_quantile = torch.tensor(0.1).to(self.device) self.history_reward_quantile = torch.tensor(0.003).to(self.device) self.historic_embeddings = torch.tensor([]).to(self.device) - self.history_size = 1000 + self.history_range = (500, 1500) # store 30 batches of completions. def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": @@ -102,23 +102,19 @@ def unique(embeddings): embeddings_unique = unique(embeddings) historic_embeddings = torch.cat([self.historic_embeddings, embeddings_unique]) - self.historic_embeddings = historic_embeddings[-self.history_size:, :] + self.historic_embeddings = historic_embeddings[-self.history_range[1]:, :] def get_historic_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor: def regularise( rewards ): - # sigmoid function that does the following mapping approximately - # 0.1 -> 0.05 - # 0.15 -> 0.4 - # 0.2 -> 0.9 - # >0.3 -> 1 - return 1/(1 + torch.exp(-50 * rewards + 8)) + # sigmoid function that cutoff at 0.05 approximately + return 1/(1 + torch.exp(-1000 * rewards + 50)) # Return None if history size is too small - if self.historic_embeddings.shape[0] < self.history_size: + if self.historic_embeddings.shape[0] < self.history_range[1]: return None # Calculate the pairwise cosine similarity. - similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings ) + similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings[self.history_range[0]:] ) # Reward to be at the 10% quantile of the 1 - similarity score. rewards = (1 - similarity).quantile(self.history_reward_quantile, dim = 1 ) From c10553331cae96272ebe7a295935d7c25cc01183 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Tue, 1 Aug 2023 17:23:49 +0000 Subject: [PATCH 4/6] longer history; removed normalization; setting bottom_k = 5 --- openvalidators/reward/diversity.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index 7e0941e..fcffd7e 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -56,10 +56,9 @@ def __init__( self, device: str ): self.tokenizer = AutoTokenizer.from_pretrained( DiversityRewardModel.diversity_model_path ) self.model = AutoModel.from_pretrained( DiversityRewardModel.diversity_model_path ).to(self.device) self.reward_quantile = torch.tensor(0.1).to(self.device) - self.history_reward_quantile = torch.tensor(0.003).to(self.device) + self.history_reward_bottom_k = 5 self.historic_embeddings = torch.tensor([]).to(self.device) - self.history_range = (500, 1500) # store 30 batches of completions. - + self.history_range = (500, 5500) def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": """Runs a forward pass through the model. @@ -110,14 +109,14 @@ def regularise( rewards ): return 1/(1 + torch.exp(-1000 * rewards + 50)) # Return None if history size is too small - if self.historic_embeddings.shape[0] < self.history_range[1]: + if self.historic_embeddings.shape[0] < self.history_range[0]: return None # Calculate the pairwise cosine similarity. similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings[self.history_range[0]:] ) # Reward to be at the 10% quantile of the 1 - similarity score. - rewards = (1 - similarity).quantile(self.history_reward_quantile, dim = 1 ) + rewards = torch.topk((1 - similarity), self.history_reward_bottom_k, largest = False)[0][:, -1] return regularise(rewards) @@ -151,4 +150,10 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch if historic_rewards != None: return batch_rewards * historic_rewards else: - return batch_rewards \ No newline at end of file + return batch_rewards + + def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: + # def regularise( rewards ): + # return 1/(1 + torch.exp(-100 * rewards + 12)) + # return regularise(rewards) + return rewards \ No newline at end of file From ecc7d5ce15bd65b02ce4cb93ec024084a00013ff Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Tue, 1 Aug 2023 20:15:14 +0000 Subject: [PATCH 5/6] keep the original regularization --- openvalidators/reward/diversity.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index fcffd7e..8b24ab1 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -58,7 +58,7 @@ def __init__( self, device: str ): self.reward_quantile = torch.tensor(0.1).to(self.device) self.history_reward_bottom_k = 5 self.historic_embeddings = torch.tensor([]).to(self.device) - self.history_range = (500, 5500) + self.history_range = (500, 15500) def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": """Runs a forward pass through the model. @@ -130,7 +130,6 @@ def get_batch_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTenso return rewards def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - # Check if completions are empty, return 0 if so if len(completions) == 0: return torch.tensor([]).to(self.device) @@ -150,10 +149,4 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch if historic_rewards != None: return batch_rewards * historic_rewards else: - return batch_rewards - - def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: - # def regularise( rewards ): - # return 1/(1 + torch.exp(-100 * rewards + 12)) - # return regularise(rewards) - return rewards \ No newline at end of file + return batch_rewards \ No newline at end of file From 30c554d4a01787aeec1365e143afd632df6e0b28 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Tue, 1 Aug 2023 21:05:38 +0000 Subject: [PATCH 6/6] removed init.py --- __init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 __init__.py diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29..0000000