From 140c947a5901e92928c6ddf4497603693f153430 Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 10 Jul 2023 14:07:45 -0700 Subject: [PATCH 01/11] self device --- openvalidators/reward/reward.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openvalidators/reward/reward.py b/openvalidators/reward/reward.py index 537d94b..6a5b968 100644 --- a/openvalidators/reward/reward.py +++ b/openvalidators/reward/reward.py @@ -76,7 +76,7 @@ def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: self.count = min(self.count_limit, self.count + new_count) # Standardize the rewards using the updated mean and variance. - rewards = rewards - self.mean + rewards = rewards - self.mean.to(self.device) if self.var > 0: rewards /= torch.sqrt(self.var) # Scale the standardized rewards to the range [0, 1] using the error function as a cumulative distribution function (CDF). From 3b7b500266b5c770f36da8128fad45afe9f2d935 Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 10 Jul 2023 14:13:25 -0700 Subject: [PATCH 02/11] cpu and detached --- openvalidators/reward/reward.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openvalidators/reward/reward.py b/openvalidators/reward/reward.py index 6a5b968..f54eaef 100644 --- a/openvalidators/reward/reward.py +++ b/openvalidators/reward/reward.py @@ -52,6 +52,8 @@ def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: - It standardizes the reward values using the updated mean and variance. - It then scales the standardized values to the 0-1 range using the error function (erf) as a CDF. """ + rewards = rewards.detach().cpu() + # Get the number of rewards (successful responses). new_count = rewards.numel() @@ -76,7 +78,7 @@ def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: self.count = min(self.count_limit, self.count + new_count) # Standardize the rewards using the updated mean and variance. - rewards = rewards - self.mean.to(self.device) + rewards = rewards - self.mean if self.var > 0: rewards /= torch.sqrt(self.var) # Scale the standardized rewards to the range [0, 1] using the error function as a cumulative distribution function (CDF). From 60194b0dfcc2995c39419394e09ce2bb3713d63a Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 10 Jul 2023 14:29:31 -0700 Subject: [PATCH 03/11] cpu --- openvalidators/forward.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openvalidators/forward.py b/openvalidators/forward.py index 9fa8397..a05bd09 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -117,7 +117,7 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude "block": ttl_get_block(self), "step_length": time.time() - start_time, "prompt": prompt, - "uids": uids.tolist(), + "uids": uids.cpu().tolist(), "completions": completions, "completion_times": completion_times, "rewards": rewards.tolist(), From affc74322d83e29636c3564d487fe3d100541094 Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 10 Jul 2023 14:52:05 -0700 Subject: [PATCH 04/11] set trace --- openvalidators/forward.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/openvalidators/forward.py b/openvalidators/forward.py index a05bd09..3d32787 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -110,14 +110,15 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude self.moving_averaged_scores: torch.FloatTensor = alpha * scattered_rewards + (1 - alpha) * self.moving_averaged_scores.to( self.device ) - + import pdb + pdb.set_trace() # Log the step event. event.update( { "block": ttl_get_block(self), "step_length": time.time() - start_time, "prompt": prompt, - "uids": uids.cpu().tolist(), + "uids": uids.tolist(), "completions": completions, "completion_times": completion_times, "rewards": rewards.tolist(), From 733c13f5d6bd1ea0762ee096cae3478ce412cabe Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 10 Jul 2023 15:11:17 -0700 Subject: [PATCH 05/11] remove pdb --- openvalidators/forward.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/openvalidators/forward.py b/openvalidators/forward.py index 3d32787..9fa8397 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -110,8 +110,7 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude self.moving_averaged_scores: torch.FloatTensor = alpha * scattered_rewards + (1 - alpha) * self.moving_averaged_scores.to( self.device ) - import pdb - pdb.set_trace() + # Log the step event. event.update( { From 6ea89870c113f12763fa5df07a209a85186a3c3b Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 10 Jul 2023 16:20:57 -0700 Subject: [PATCH 06/11] num_uids --- openvalidators/gating.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openvalidators/gating.py b/openvalidators/gating.py index 482ad0e..907e903 100644 --- a/openvalidators/gating.py +++ b/openvalidators/gating.py @@ -137,7 +137,7 @@ def __init__( config = GatingModel.config() if model_name is not None: config.gating.model_name = model_name - config.gating.num_uids = num_uids if num_uids is not None else metagraph.n + config.gating.num_uids = num_uids if num_uids is not None else config.gating.num_uids self.config = config self.num_uids = config.gating.num_uids self.device = torch.device(self.config.neuron.device) From 68a756b9c1ef1ba3c9b1f53d991ae3b0e4dcc2d5 Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 10 Jul 2023 17:04:13 -0700 Subject: [PATCH 07/11] rewards --- openvalidators/reward/reward.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/openvalidators/reward/reward.py b/openvalidators/reward/reward.py index f54eaef..4400e7a 100644 --- a/openvalidators/reward/reward.py +++ b/openvalidators/reward/reward.py @@ -51,9 +51,7 @@ def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: - This function uses Welford's online algorithm to update the mean and variance. - It standardizes the reward values using the updated mean and variance. - It then scales the standardized values to the 0-1 range using the error function (erf) as a CDF. - """ - rewards = rewards.detach().cpu() - + """ # Get the number of rewards (successful responses). new_count = rewards.numel() From e2be6e01287f5a2719cbf07d18ab04be85317d07 Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 10 Jul 2023 17:16:37 -0700 Subject: [PATCH 08/11] detach rewards --- openvalidators/reward/reward.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openvalidators/reward/reward.py b/openvalidators/reward/reward.py index 4400e7a..54ef90d 100644 --- a/openvalidators/reward/reward.py +++ b/openvalidators/reward/reward.py @@ -52,6 +52,8 @@ def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: - It standardizes the reward values using the updated mean and variance. - It then scales the standardized values to the 0-1 range using the error function (erf) as a CDF. """ + rewards = rewards.detach().cpu() + # Get the number of rewards (successful responses). new_count = rewards.numel() @@ -88,6 +90,7 @@ def apply( self, prompt: str, responses: List[ bt.DendriteCall ], name: str) -> """ Applies the reward model across each call. Unsuccessful responses are zeroed. """ # Get indices of correctly responding calls. + successful_completions_indices: List[int] = [ idx for idx, resp in enumerate(responses) if resp.is_success ] # Get all completions from responding calls. From 92867ed54a0c7fbc4247eff04a7c0f31feb7e985 Mon Sep 17 00:00:00 2001 From: Eugene Date: Tue, 11 Jul 2023 10:19:05 -0700 Subject: [PATCH 09/11] check if uids have enough for query --- openvalidators/forward.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/openvalidators/forward.py b/openvalidators/forward.py index 9fa8397..83faa51 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -42,15 +42,24 @@ def get_random_uids(self, k: int, exclude: List[int] = None) -> torch.LongTensor If `k` is larger than the number of available `uids`, set `k` to the number of available `uids`. """ candidate_uids = [] + avail_uids = [] for uid in range(self.metagraph.n.item()): uid_is_available = check_uid_availability(self.metagraph, uid, self.config.neuron.vpermit_tao_limit) uid_is_not_excluded = exclude is None or uid not in exclude - if uid_is_available and uid_is_not_excluded: - candidate_uids.append(uid) + if uid_is_available: + avail_uids.append(uid) + if uid_is_not_excluded: + candidate_uids.append(uid) + + # Check if candidate_uids contain enough for querying, if not grab all avaliable uids + if len(candidate_uids) > k: + available_uids = torch.tensor(candidate_uids, dtype=torch.int64).to(self.device) + else: + available_uids = torch.tensor(avail_uids, dtype=torch.int64).to(self.device) + - available_uids = torch.tensor(candidate_uids, dtype=torch.int64).to(self.device) uids = torch.tensor(random.sample(available_uids.tolist(), k), dtype=torch.int64) return uids From 010f178151369b0d08f10b79b444b2fa1255a18c Mon Sep 17 00:00:00 2001 From: Eugene Date: Thu, 13 Jul 2023 08:59:00 -0700 Subject: [PATCH 10/11] remvoe device error --- openvalidators/reward/reward.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/openvalidators/reward/reward.py b/openvalidators/reward/reward.py index 54ef90d..570b7bf 100644 --- a/openvalidators/reward/reward.py +++ b/openvalidators/reward/reward.py @@ -52,8 +52,6 @@ def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: - It standardizes the reward values using the updated mean and variance. - It then scales the standardized values to the 0-1 range using the error function (erf) as a CDF. """ - rewards = rewards.detach().cpu() - # Get the number of rewards (successful responses). new_count = rewards.numel() From 7591dae4033a2544471faea120cbfb6a179ad1cb Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Thu, 13 Jul 2023 12:10:23 -0400 Subject: [PATCH 11/11] replicates config.gating.num_uids to sentence embed gating model --- openvalidators/gating.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openvalidators/gating.py b/openvalidators/gating.py index 907e903..193e348 100644 --- a/openvalidators/gating.py +++ b/openvalidators/gating.py @@ -228,7 +228,7 @@ def __init__( config = SentenceEmbedGatingModel.config() if model_name is not None: config.gating.model_name = model_name - config.gating.num_uids = num_uids if num_uids is not None else metagraph.n + config.gating.num_uids = num_uids if num_uids is not None else config.gating.num_uids self.config = config self.num_uids = config.gating.num_uids self.device = torch.device(self.config.neuron.device)