From d1681989d8abe97d7ebb803289b38161fbf8ae1f Mon Sep 17 00:00:00 2001 From: Ty Potter Date: Thu, 15 Aug 2024 13:13:14 -0600 Subject: [PATCH 1/2] fix: Use action key to select best scoring action --- eppo_client/bandit.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/eppo_client/bandit.py b/eppo_client/bandit.py index 2d05e82..378beb2 100644 --- a/eppo_client/bandit.py +++ b/eppo_client/bandit.py @@ -160,8 +160,12 @@ def weigh_actions( self, action_scores, gamma, probability_floor ) -> Dict[str, float]: number_of_actions = len(action_scores) - best_action = max(action_scores, key=action_scores.get) - best_score = action_scores[best_action] + # Find the max score + best_score = max(action_scores.values()) + # Get all the keys that have the same best score (if there's more than one) + best_action_keys = [k for k,v in action_scores.items() if v == best_score] + # Get the lowest lexicographically ordered key. + best_action = min(best_action_keys) # adjust probability floor for number of actions to control the sum min_probability = probability_floor / number_of_actions From 41d4c4233e1c8e82fae68293d241b8e17ec15034 Mon Sep 17 00:00:00 2001 From: Ty Potter Date: Thu, 15 Aug 2024 19:06:25 -0600 Subject: [PATCH 2/2] lint --- eppo_client/bandit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/eppo_client/bandit.py b/eppo_client/bandit.py index 378beb2..8f9be69 100644 --- a/eppo_client/bandit.py +++ b/eppo_client/bandit.py @@ -160,11 +160,11 @@ def weigh_actions( self, action_scores, gamma, probability_floor ) -> Dict[str, float]: number_of_actions = len(action_scores) - # Find the max score + # Find the max score best_score = max(action_scores.values()) - # Get all the keys that have the same best score (if there's more than one) - best_action_keys = [k for k,v in action_scores.items() if v == best_score] - # Get the lowest lexicographically ordered key. + # Get all the keys that have the same best score (if there's more than one) + best_action_keys = [k for k, v in action_scores.items() if v == best_score] + # Get the lowest lexicographically ordered key. best_action = min(best_action_keys) # adjust probability floor for number of actions to control the sum