From d1681989d8abe97d7ebb803289b38161fbf8ae1f Mon Sep 17 00:00:00 2001
From: Ty Potter <tyler@fluxon.com>
Date: Thu, 15 Aug 2024 13:13:14 -0600
Subject: [PATCH 1/2] fix: Use action key to select best scoring action

---
 eppo_client/bandit.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/eppo_client/bandit.py b/eppo_client/bandit.py
index 2d05e82..378beb2 100644
--- a/eppo_client/bandit.py
+++ b/eppo_client/bandit.py
@@ -160,8 +160,12 @@ def weigh_actions(
         self, action_scores, gamma, probability_floor
     ) -> Dict[str, float]:
         number_of_actions = len(action_scores)
-        best_action = max(action_scores, key=action_scores.get)
-        best_score = action_scores[best_action]
+	# Find the max score
+        best_score = max(action_scores.values())
+	# Get all the keys that have the same best score (if there's more than one)
+        best_action_keys = [k for k,v in action_scores.items() if v == best_score]
+	# Get the lowest lexicographically ordered key.
+        best_action = min(best_action_keys)
 
         # adjust probability floor for number of actions to control the sum
         min_probability = probability_floor / number_of_actions

From 41d4c4233e1c8e82fae68293d241b8e17ec15034 Mon Sep 17 00:00:00 2001
From: Ty Potter <tyler@fluxon.com>
Date: Thu, 15 Aug 2024 19:06:25 -0600
Subject: [PATCH 2/2] lint

---
 eppo_client/bandit.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/eppo_client/bandit.py b/eppo_client/bandit.py
index 378beb2..8f9be69 100644
--- a/eppo_client/bandit.py
+++ b/eppo_client/bandit.py
@@ -160,11 +160,11 @@ def weigh_actions(
         self, action_scores, gamma, probability_floor
     ) -> Dict[str, float]:
         number_of_actions = len(action_scores)
-	# Find the max score
+        # Find the max score
         best_score = max(action_scores.values())
-	# Get all the keys that have the same best score (if there's more than one)
-        best_action_keys = [k for k,v in action_scores.items() if v == best_score]
-	# Get the lowest lexicographically ordered key.
+        # Get all the keys that have the same best score (if there's more than one)
+        best_action_keys = [k for k, v in action_scores.items() if v == best_score]
+        # Get the lowest lexicographically ordered key.
         best_action = min(best_action_keys)
 
         # adjust probability floor for number of actions to control the sum