From 20b7a3a58576d84e556b78cc3c6c1c0ddf8a079c Mon Sep 17 00:00:00 2001 From: Eugene Date: Thu, 8 Jun 2023 16:17:21 -0700 Subject: [PATCH 1/2] fixes the moving average scores and scatters --- openvalidators/forward.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openvalidators/forward.py b/openvalidators/forward.py index 03aa2b4..bb0ddf6 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -341,12 +341,12 @@ async def forward(self): # Compute forward pass rewards. scattered_followup_rewards = ( - torch.zeros((self.metagraph.n), dtype=torch.float32).to(self.device).scatter(0, followup_uids, followup_rewards) + self.moving_averaged_scores.scatter(0, followup_uids, followup_rewards) ) scattered_answer_rewards = ( - torch.zeros((self.metagraph.n), dtype=torch.float32).to(self.device).scatter(0, answer_uids, answer_rewards) + self.moving_averaged_scores.scatter(0, answer_uids, answer_rewards) ) - rewards = scattered_followup_rewards + scattered_answer_rewards + rewards = (scattered_followup_rewards + scattered_answer_rewards)/2 self.moving_averaged_scores = self.config.neuron.moving_average_alpha * rewards.to(self.device) + ( 1 - self.config.neuron.moving_average_alpha ) * self.moving_averaged_scores.to(self.device) From 9ad044abc563493e3c3e0cd854f97c180d7cf393 Mon Sep 17 00:00:00 2001 From: opentaco Date: Fri, 9 Jun 2023 10:39:05 +0200 Subject: [PATCH 2/2] Combine followup and answer rewards --- openvalidators/forward.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/openvalidators/forward.py b/openvalidators/forward.py index bb0ddf6..6cc11d6 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -339,14 +339,11 @@ async def forward(self): rewards=answer_rewards, ) - # Compute forward pass rewards. - scattered_followup_rewards = ( - self.moving_averaged_scores.scatter(0, followup_uids, followup_rewards) - ) - scattered_answer_rewards = ( - self.moving_averaged_scores.scatter(0, answer_uids, answer_rewards) - ) - rewards = (scattered_followup_rewards + scattered_answer_rewards)/2 + # Compute forward pass rewards, assumes followup_uids and answer_uids are mutually exclusive. + rewards = self.moving_averaged_scores.scatter(0, followup_uids, followup_rewards) + rewards = rewards.scatter(0, answer_uids, answer_rewards) + + # Update moving_averaged_scores with rewards. self.moving_averaged_scores = self.config.neuron.moving_average_alpha * rewards.to(self.device) + ( 1 - self.config.neuron.moving_average_alpha ) * self.moving_averaged_scores.to(self.device)