From 105773e69e5491634f5ac88aba382b9f240fc5f8 Mon Sep 17 00:00:00 2001 From: Eugene Date: Thu, 3 Aug 2023 12:41:19 -0400 Subject: [PATCH 1/3] relevance updates --- openvalidators/forward.py | 11 +++++++++-- openvalidators/reward/relevance.py | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/openvalidators/forward.py b/openvalidators/forward.py index 376fc8c..1fd5551 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -62,7 +62,11 @@ def get_random_uids(self, k: int, exclude: List[int] = None) -> torch.LongTensor return uids -async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude: list = []): +async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude: list = [], base_prompt = None): + + if base_prompt == None: + base_prompt = prompt + bt.logging.debug("run_step", name) # Record event start time. @@ -90,7 +94,7 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude bt.logging.trace(str(reward_fn_i.name), reward_i.tolist()) for masking_fn_i in self.masking_functions: - mask_i = masking_fn_i.apply(prompt, responses, name).to(self.device) + mask_i = masking_fn_i.apply(base_prompt, responses, name).to(self.device) rewards *= mask_i # includes diversity if not self.config.neuron.disable_log_rewards: event[masking_fn_i.name] = mask_i.tolist() @@ -168,6 +172,7 @@ async def forward(self): ) base_text = augment_event["best"] + base_prompt = augment_event["best"] exclude = augment_event["uids"] for k in range(self.config.neuron.num_followup_steps): @@ -180,6 +185,7 @@ async def forward(self): k=self.config.neuron.followup_sample_size, timeout=self.config.neuron.followup_timeout, exclude=exclude, + base_prompt=base_prompt ) exclude += followup_event["uids"] @@ -192,6 +198,7 @@ async def forward(self): k=self.config.neuron.answer_sample_size, timeout=self.config.neuron.answer_timeout, exclude=exclude, + base_prompt=followup_event["best"] ) exclude += answer_event["uids"] diff --git a/openvalidators/reward/relevance.py b/openvalidators/reward/relevance.py index d41d50f..979aaa8 100644 --- a/openvalidators/reward/relevance.py +++ b/openvalidators/reward/relevance.py @@ -61,6 +61,7 @@ def __init__( self, device: str ): self.bounds = [-0.0246, 0.3] def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: + print('revelence prompt:',prompt) return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32).to(self.device) def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: From 85933bf7eb9e196cb9e1f1809dea23b577fd11e7 Mon Sep 17 00:00:00 2001 From: Eugene Date: Thu, 3 Aug 2023 13:00:21 -0400 Subject: [PATCH 2/3] more logging --- openvalidators/forward.py | 2 ++ openvalidators/reward/relevance.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/openvalidators/forward.py b/openvalidators/forward.py index 1fd5551..64c07db 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -212,3 +212,5 @@ async def forward(self): ) else: base_text = base_text + "\nQuestion:" + followup_event["best"] + "\nAnswer:" + answer_event["best"] + + print(base_text) diff --git a/openvalidators/reward/relevance.py b/openvalidators/reward/relevance.py index 979aaa8..2dd2c1d 100644 --- a/openvalidators/reward/relevance.py +++ b/openvalidators/reward/relevance.py @@ -61,7 +61,7 @@ def __init__( self, device: str ): self.bounds = [-0.0246, 0.3] def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - print('revelence prompt:',prompt) + print(f'revelence prompt:{name}',prompt) return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32).to(self.device) def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: From 4ff6ebb58d9abe5e8c99752eb6175ab689a0c7db Mon Sep 17 00:00:00 2001 From: Eugene Date: Tue, 8 Aug 2023 10:33:51 -0700 Subject: [PATCH 3/3] remove logging prints --- openvalidators/forward.py | 3 +-- openvalidators/reward/relevance.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/openvalidators/forward.py b/openvalidators/forward.py index 64c07db..59589b2 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -212,5 +212,4 @@ async def forward(self): ) else: base_text = base_text + "\nQuestion:" + followup_event["best"] + "\nAnswer:" + answer_event["best"] - - print(base_text) + \ No newline at end of file diff --git a/openvalidators/reward/relevance.py b/openvalidators/reward/relevance.py index 2dd2c1d..d41d50f 100644 --- a/openvalidators/reward/relevance.py +++ b/openvalidators/reward/relevance.py @@ -61,7 +61,6 @@ def __init__( self, device: str ): self.bounds = [-0.0246, 0.3] def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - print(f'revelence prompt:{name}',prompt) return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32).to(self.device) def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: