From 288d76f1b312852959c28890fd8104ebf498ca54 Mon Sep 17 00:00:00 2001
From: isabella618033 <isabella618033@gmail.com>
Date: Fri, 28 Jul 2023 15:55:50 +0000
Subject: [PATCH 1/6] added historic diversity

---
 __init__.py                        |  0
 openvalidators/reward/diversity.py | 60 +++++++++++++++++++++++++++---
 2 files changed, 55 insertions(+), 5 deletions(-)
 create mode 100644 __init__.py

diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py
index 185c53b..cd0f719 100644
--- a/openvalidators/reward/diversity.py
+++ b/openvalidators/reward/diversity.py
@@ -56,6 +56,10 @@ def __init__( self, device: str ):
         self.tokenizer = AutoTokenizer.from_pretrained( DiversityRewardModel.diversity_model_path )
         self.model = AutoModel.from_pretrained( DiversityRewardModel.diversity_model_path ).to(self.device)
         self.reward_quantile = torch.tensor(0.1).to(self.device)
+        self.history_reward_quantile = torch.tensor(0.003).to(self.device)
+        self.historic_embeddings = torch.tensor([]).to(self.device)
+        self.history_size = 1000
+
         
     def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor":
         """Runs a forward pass through the model.
@@ -86,6 +90,50 @@ def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor":
         sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
         return sentence_embeddings
 
+    def update_historic_embeddings( self, embeddings: torch.FloatTensor ):
+        def unique(embeddings):
+            unique_embeddings = [embeddings[0]]
+            last_emb = embeddings[0]
+            for emb in embeddings:
+                if not torch.all(torch.eq(emb, last_emb)):
+                    unique_embeddings.append(emb)
+                last_emb = emb
+            return torch.stack(unique_embeddings)
+ 
+        embeddings_unique = unique(embeddings)
+        historic_embeddings = torch.cat([self.historic_embeddings, embeddings_unique])
+        self.historic_embeddings = historic_embeddings[-self.history_size:, :]
+    
+    def get_historic_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor:
+        def regularise( rewards ):
+            # sigmoid function that does the following mapping approximately
+            # 0.1 -> 0.05 
+            # 0.15 -> 0.4
+            # 0.2 -> 0.9
+            # >0.3 -> 1
+            return 1/(1 + torch.exp(-50 * rewards + 8))
+
+        # Return None if history size is too small 
+        if self.historic_embeddings.shape[0] < self.history_size:
+            return None
+        
+        # Calculate the pairwise cosine similarity.
+        similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings )
+
+        # Reward to be at the 10% quantile of the 1 - similarity score.
+        rewards = (1 - similarity).quantile(self.history_reward_quantile, dim = 1 )
+
+        return regularise(rewards) 
+
+    def get_batch_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor:
+        # Calculate the pairwise cosine similarity.
+        similarity = pairwise_cosine_similarity( embeddings, embeddings )
+
+        # Reward to be at the 10% quantile of the 1 - similarity score.
+        rewards = (1 - similarity).quantile(self.reward_quantile, dim = 1 )
+
+        return rewards 
+    
     def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor:
 
         # Check if completions are empty, return 0 if so
@@ -95,11 +143,13 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch
         # Get embeddings for all completions.
         embeddings = self.get_embeddings( completions )
 
-        # Calculate the pairwise cosine similarity.
-        similarity = pairwise_cosine_similarity( embeddings, embeddings )
+        # Get batch rewards.
+        batch_rewards = self.get_batch_rewards(embeddings)
 
-        # Reward to be at the 10% quantile of the 1 - similarity score.
-        rewards = (1 - similarity).quantile(self.reward_quantile, dim = 1 )
+        # get historic rewards.
+        historic_rewards = self.get_historic_rewards(embeddings)
 
+        self.update_historic_embeddings(embeddings)
+        
         # Return all
-        return rewards
\ No newline at end of file
+        return batch_rewards * historic_rewards
\ No newline at end of file

From b287c76972dc44f84c9fedafc6aceadae0ba03b3 Mon Sep 17 00:00:00 2001
From: isabella618033 <isabella618033@gmail.com>
Date: Fri, 28 Jul 2023 16:54:18 +0000
Subject: [PATCH 2/6] fix

---
 openvalidators/reward/diversity.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py
index cd0f719..23e04fd 100644
--- a/openvalidators/reward/diversity.py
+++ b/openvalidators/reward/diversity.py
@@ -152,4 +152,7 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch
         self.update_historic_embeddings(embeddings)
         
         # Return all
-        return batch_rewards * historic_rewards
\ No newline at end of file
+        if historic_rewards != None:
+            return batch_rewards * historic_rewards
+        else:
+            return batch_rewards
\ No newline at end of file

From b2ac365a353c737ceec8bb6e73d6950cb2075959 Mon Sep 17 00:00:00 2001
From: isabella618033 <isabella618033@gmail.com>
Date: Mon, 31 Jul 2023 20:55:32 +0000
Subject: [PATCH 3/6] historic similarity check at range 500 - 1500

---
 openvalidators/reward/diversity.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py
index 23e04fd..7e0941e 100644
--- a/openvalidators/reward/diversity.py
+++ b/openvalidators/reward/diversity.py
@@ -58,7 +58,7 @@ def __init__( self, device: str ):
         self.reward_quantile = torch.tensor(0.1).to(self.device)
         self.history_reward_quantile = torch.tensor(0.003).to(self.device)
         self.historic_embeddings = torch.tensor([]).to(self.device)
-        self.history_size = 1000
+        self.history_range = (500, 1500) # store 30 batches of completions.
 
         
     def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor":
@@ -102,23 +102,19 @@ def unique(embeddings):
  
         embeddings_unique = unique(embeddings)
         historic_embeddings = torch.cat([self.historic_embeddings, embeddings_unique])
-        self.historic_embeddings = historic_embeddings[-self.history_size:, :]
+        self.historic_embeddings = historic_embeddings[-self.history_range[1]:, :]
     
     def get_historic_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor:
         def regularise( rewards ):
-            # sigmoid function that does the following mapping approximately
-            # 0.1 -> 0.05 
-            # 0.15 -> 0.4
-            # 0.2 -> 0.9
-            # >0.3 -> 1
-            return 1/(1 + torch.exp(-50 * rewards + 8))
+            # sigmoid function that cutoff at 0.05 approximately
+            return 1/(1 + torch.exp(-1000 * rewards + 50))
 
         # Return None if history size is too small 
-        if self.historic_embeddings.shape[0] < self.history_size:
+        if self.historic_embeddings.shape[0] < self.history_range[1]:
             return None
         
         # Calculate the pairwise cosine similarity.
-        similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings )
+        similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings[self.history_range[0]:] )
 
         # Reward to be at the 10% quantile of the 1 - similarity score.
         rewards = (1 - similarity).quantile(self.history_reward_quantile, dim = 1 )

From c10553331cae96272ebe7a295935d7c25cc01183 Mon Sep 17 00:00:00 2001
From: isabella618033 <isabella618033@gmail.com>
Date: Tue, 1 Aug 2023 17:23:49 +0000
Subject: [PATCH 4/6] longer history;  removed normalization; setting bottom_k
 = 5

---
 openvalidators/reward/diversity.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py
index 7e0941e..fcffd7e 100644
--- a/openvalidators/reward/diversity.py
+++ b/openvalidators/reward/diversity.py
@@ -56,10 +56,9 @@ def __init__( self, device: str ):
         self.tokenizer = AutoTokenizer.from_pretrained( DiversityRewardModel.diversity_model_path )
         self.model = AutoModel.from_pretrained( DiversityRewardModel.diversity_model_path ).to(self.device)
         self.reward_quantile = torch.tensor(0.1).to(self.device)
-        self.history_reward_quantile = torch.tensor(0.003).to(self.device)
+        self.history_reward_bottom_k = 5
         self.historic_embeddings = torch.tensor([]).to(self.device)
-        self.history_range = (500, 1500) # store 30 batches of completions.
-
+        self.history_range = (500, 5500)
         
     def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor":
         """Runs a forward pass through the model.
@@ -110,14 +109,14 @@ def regularise( rewards ):
             return 1/(1 + torch.exp(-1000 * rewards + 50))
 
         # Return None if history size is too small 
-        if self.historic_embeddings.shape[0] < self.history_range[1]:
+        if self.historic_embeddings.shape[0] < self.history_range[0]:
             return None
         
         # Calculate the pairwise cosine similarity.
         similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings[self.history_range[0]:] )
 
         # Reward to be at the 10% quantile of the 1 - similarity score.
-        rewards = (1 - similarity).quantile(self.history_reward_quantile, dim = 1 )
+        rewards = torch.topk((1 - similarity), self.history_reward_bottom_k, largest = False)[0][:, -1]
 
         return regularise(rewards) 
 
@@ -151,4 +150,10 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch
         if historic_rewards != None:
             return batch_rewards * historic_rewards
         else:
-            return batch_rewards
\ No newline at end of file
+            return batch_rewards
+
+    def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor:
+        # def regularise( rewards ):
+        #     return 1/(1 + torch.exp(-100 * rewards + 12))
+        # return regularise(rewards)
+        return rewards
\ No newline at end of file

From ecc7d5ce15bd65b02ce4cb93ec024084a00013ff Mon Sep 17 00:00:00 2001
From: isabella618033 <isabella618033@gmail.com>
Date: Tue, 1 Aug 2023 20:15:14 +0000
Subject: [PATCH 5/6] keep the original regularization

---
 openvalidators/reward/diversity.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py
index fcffd7e..8b24ab1 100644
--- a/openvalidators/reward/diversity.py
+++ b/openvalidators/reward/diversity.py
@@ -58,7 +58,7 @@ def __init__( self, device: str ):
         self.reward_quantile = torch.tensor(0.1).to(self.device)
         self.history_reward_bottom_k = 5
         self.historic_embeddings = torch.tensor([]).to(self.device)
-        self.history_range = (500, 5500)
+        self.history_range = (500, 15500)
         
     def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor":
         """Runs a forward pass through the model.
@@ -130,7 +130,6 @@ def get_batch_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTenso
         return rewards 
     
     def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor:
-
         # Check if completions are empty, return 0 if so
         if len(completions) == 0:
             return torch.tensor([]).to(self.device)
@@ -150,10 +149,4 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch
         if historic_rewards != None:
             return batch_rewards * historic_rewards
         else:
-            return batch_rewards
-
-    def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor:
-        # def regularise( rewards ):
-        #     return 1/(1 + torch.exp(-100 * rewards + 12))
-        # return regularise(rewards)
-        return rewards
\ No newline at end of file
+            return batch_rewards
\ No newline at end of file

From 30c554d4a01787aeec1365e143afd632df6e0b28 Mon Sep 17 00:00:00 2001
From: isabella618033 <isabella618033@gmail.com>
Date: Tue, 1 Aug 2023 21:05:38 +0000
Subject: [PATCH 6/6] removed init.py

---
 __init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 __init__.py

diff --git a/__init__.py b/__init__.py
deleted file mode 100644
index e69de29..0000000