From 056a7f108d8a921235a94a45d3422e3c7e85393b Mon Sep 17 00:00:00 2001 From: Sven Schmit Date: Mon, 10 Jun 2024 10:26:55 -0700 Subject: [PATCH 1/6] Switch actions input from list to dict --- eppo_client/bandit.py | 4 ++++ eppo_client/client.py | 13 +++++++++---- test/client_bandit_test.py | 32 +++++++++++++++++--------------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/eppo_client/bandit.py b/eppo_client/bandit.py index ab37a11..7412075 100644 --- a/eppo_client/bandit.py +++ b/eppo_client/bandit.py @@ -23,6 +23,10 @@ class Attributes: numeric_attributes: Dict[str, float] categorical_attributes: Dict[str, str] + @classmethod + def empty(cls): + return cls({}, {}) + @dataclass class ActionContext: diff --git a/eppo_client/client.py b/eppo_client/client.py index e945ed4..25e2886 100644 --- a/eppo_client/client.py +++ b/eppo_client/client.py @@ -226,7 +226,7 @@ def get_bandit_action( flag_key: str, subject_key: str, subject_context: Attributes, - actions_with_contexts: List[ActionContext], + actions: Dict[str, Attributes], default: str, ) -> BanditResult: """ @@ -245,7 +245,8 @@ def get_bandit_action( flag_key (str): The feature flag key that contains the bandit as one of the variations. subject_key (str): The key identifying the subject. subject_context (Attributes): The subject context - actions_with_contexts (List[ActionContext]): The list of actions with their contexts. + actions (Dict[str, Attributes]): The dictionary that maps action keys to their context of actions with their contexts. + default (str): The default variation to use if the subject is not part of the bandit. Returns: BanditResult: The result containing either the bandit action if the subject is part of the bandit, @@ -258,7 +259,7 @@ def get_bandit_action( flag_key, subject_key, subject_context, - actions_with_contexts, + actions, default, ) except Exception as e: @@ -272,7 +273,7 @@ def get_bandit_action_detail( flag_key: str, subject_key: str, subject_context: Attributes, - actions_with_contexts: List[ActionContext], + actions: Dict[str, Attributes], default: str, ) -> BanditResult: # get experiment assignment @@ -294,6 +295,10 @@ def get_bandit_action_detail( ) return BanditResult(variation, None) + actions_with_contexts = [ + ActionContext(action_key, attributes) + for action_key, attributes in actions.items() + ] evaluation = self.__bandit_evaluator.evaluate_bandit( flag_key, subject_key, diff --git a/test/client_bandit_test.py b/test/client_bandit_test.py index 006dd1f..926b2c9 100644 --- a/test/client_bandit_test.py +++ b/test/client_bandit_test.py @@ -108,10 +108,15 @@ def test_get_bandit_action_flag_without_bandit(): def test_get_bandit_action_with_subject_attributes(): # tests that allocation filtering based on subject attributes works correctly client = get_instance() - actions = [ - ActionContext.create("adidas", {"discount": 0.1}, {"from": "germany"}), - ActionContext.create("nike", {"discount": 0.2}, {"from": "usa"}), - ] + actions = { + "adidas": Attributes( + numeric_attributes={"discount": 0.1}, + categorical_attributes={"from": "germany"}, + ), + "nike": Attributes( + numeric_attributes={"discount": 0.2}, categorical_attributes={"from": "usa"} + ), + } result = client.get_bandit_action( "banner_bandit_flag_uk_only", "alice", @@ -145,16 +150,15 @@ def test_get_bandit_action_with_subject_attributes(): assert bandit_log_statement["optimalityGap"] >= 0 assert bandit_log_statement["actionProbability"] >= 0 - chosen_action = next( - action for action in actions if action.action_key == result.action - ) + chosen_action = actions[result.action] + assert ( bandit_log_statement["actionNumericAttributes"] - == chosen_action.attributes.numeric_attributes + == chosen_action.numeric_attributes ) assert ( bandit_log_statement["actionCategoricalAttributes"] - == chosen_action.attributes.categorical_attributes + == chosen_action.categorical_attributes ) @@ -175,14 +179,12 @@ def test_bandit_generic_test_cases(test_case): "categorical_attributes" ], ), - [ - ActionContext.create( - action["actionKey"], - action["numericAttributes"], - action["categoricalAttributes"], + { + action["actionKey"]: Attributes( + action["numericAttributes"], action["categoricalAttributes"] ) for action in subject["actions"] - ], + }, default_value, ) From e90daaec7fdc152c5e5646dca67851ad402695bb Mon Sep 17 00:00:00 2001 From: Sven Schmit Date: Mon, 10 Jun 2024 10:53:34 -0700 Subject: [PATCH 2/6] refactor --- eppo_client/bandit.py | 119 ++++++++++--------------------------- eppo_client/client.py | 12 ++-- test/bandit_test.py | 54 +++++++---------- test/client_bandit_test.py | 2 +- 4 files changed, 58 insertions(+), 129 deletions(-) diff --git a/eppo_client/bandit.py b/eppo_client/bandit.py index 7412075..c41ead7 100644 --- a/eppo_client/bandit.py +++ b/eppo_client/bandit.py @@ -28,44 +28,7 @@ def empty(cls): return cls({}, {}) -@dataclass -class ActionContext: - action_key: str - attributes: Attributes - - @classmethod - def create( - cls, - action_key: str, - numeric_attributes: Dict[str, float], - categorical_attributes: Dict[str, str], - ): - """ - Create an instance of ActionContext. - - Args: - action_key (str): The key representing the action. - numeric_attributes (Dict[str, float]): A dictionary of numeric attributes. - categorical_attributes (Dict[str, str]): A dictionary of categorical attributes. - - Returns: - ActionContext: An instance of ActionContext with the provided action key and attributes. - """ - return cls( - action_key, - Attributes( - numeric_attributes=numeric_attributes, - categorical_attributes=categorical_attributes, - ), - ) - - @property - def numeric_attributes(self): - return self.attributes.numeric_attributes - - @property - def categorical_attributes(self): - return self.attributes.categorical_attributes +ActionContexts = Dict[str, Attributes] @dataclass @@ -108,19 +71,16 @@ def evaluate_bandit( flag_key: str, subject_key: str, subject_attributes: Attributes, - actions_with_contexts: List[ActionContext], + actions: ActionContexts, bandit_model: BanditModelData, ) -> BanditEvaluation: # handle the edge case that there are no actions - if not actions_with_contexts: + if not actions: return null_evaluation( flag_key, subject_key, subject_attributes, bandit_model.gamma ) - action_scores = self.score_actions( - subject_attributes, actions_with_contexts, bandit_model - ) - + action_scores = self.score_actions(subject_attributes, actions, bandit_model) action_weights = self.weigh_actions( action_scores, bandit_model.gamma, @@ -128,24 +88,16 @@ def evaluate_bandit( ) selected_action = self.select_action(flag_key, subject_key, action_weights) - selected_idx = next( - idx - for idx, action_context in enumerate(actions_with_contexts) - if action_context.action_key == selected_action - ) - - optimality_gap = ( - max(score for _, score in action_scores) - action_scores[selected_idx][1] - ) + optimality_gap = max(action_scores.values()) - action_scores[selected_action] return BanditEvaluation( flag_key, subject_key, subject_attributes, selected_action, - actions_with_contexts[selected_idx].attributes, - action_scores[selected_idx][1], - action_weights[selected_idx][1], + actions[selected_action], + action_scores[selected_action], + action_weights[selected_action], bandit_model.gamma, optimality_gap, ) @@ -153,56 +105,51 @@ def evaluate_bandit( def score_actions( self, subject_attributes: Attributes, - actions_with_contexts: List[ActionContext], + actions: ActionContexts, bandit_model: BanditModelData, - ) -> List[Tuple[str, float]]: - return [ - ( - action_context.action_key, - ( - score_action( - subject_attributes, - action_context.attributes, - bandit_model.coefficients[action_context.action_key], - ) - if action_context.action_key in bandit_model.coefficients - else bandit_model.default_action_score - ), + ) -> Dict[str, float]: + return { + action_key: ( + score_action( + subject_attributes, + action_attributes, + bandit_model.coefficients[action_key], + ) + if action_key in bandit_model.coefficients + else bandit_model.default_action_score ) - for action_context in actions_with_contexts - ] + for action_key, action_attributes in actions.items() + } def weigh_actions( self, action_scores, gamma, probability_floor - ) -> List[Tuple[str, float]]: + ) -> Dict[str, float]: number_of_actions = len(action_scores) - best_action, best_score = max(action_scores, key=lambda t: t[1]) + best_action = max(action_scores, key=action_scores.get) + best_score = action_scores[best_action] # adjust probability floor for number of actions to control the sum min_probability = probability_floor / number_of_actions # weight all but the best action - weights = [ - ( - action_key, - max( - min_probability, - 1.0 / (number_of_actions + gamma * (best_score - score)), - ), + weights = { + action_key: max( + min_probability, + 1.0 / (number_of_actions + gamma * (best_score - score)), ) - for action_key, score in action_scores + for action_key, score in action_scores.items() if action_key != best_action - ] + } # remaining weight goes to best action - remaining_weight = max(0.0, 1.0 - sum(weight for _, weight in weights)) - weights.append((best_action, remaining_weight)) + remaining_weight = max(0.0, 1.0 - sum(weights.values())) + weights[best_action] = remaining_weight return weights def select_action(self, flag_key, subject_key, action_weights) -> str: # deterministic ordering sorted_action_weights = sorted( - action_weights, + action_weights.items(), key=lambda t: ( self.sharder.get_shard( f"{flag_key}-{subject_key}-{t[0]}", self.total_shards diff --git a/eppo_client/client.py b/eppo_client/client.py index 25e2886..9d4eb9d 100644 --- a/eppo_client/client.py +++ b/eppo_client/client.py @@ -3,7 +3,7 @@ import json from typing import Any, Dict, List, Optional from eppo_client.assignment_logger import AssignmentLogger -from eppo_client.bandit import BanditEvaluator, BanditResult, ActionContext, Attributes +from eppo_client.bandit import BanditEvaluator, BanditResult, Attributes, ActionContexts from eppo_client.configuration_requestor import ( ExperimentConfigurationRequestor, ) @@ -226,7 +226,7 @@ def get_bandit_action( flag_key: str, subject_key: str, subject_context: Attributes, - actions: Dict[str, Attributes], + actions: ActionContexts, default: str, ) -> BanditResult: """ @@ -273,7 +273,7 @@ def get_bandit_action_detail( flag_key: str, subject_key: str, subject_context: Attributes, - actions: Dict[str, Attributes], + actions: ActionContexts, default: str, ) -> BanditResult: # get experiment assignment @@ -295,15 +295,11 @@ def get_bandit_action_detail( ) return BanditResult(variation, None) - actions_with_contexts = [ - ActionContext(action_key, attributes) - for action_key, attributes in actions.items() - ] evaluation = self.__bandit_evaluator.evaluate_bandit( flag_key, subject_key, subject_context, - actions_with_contexts, + actions, bandit_data.model_data, ) diff --git a/test/bandit_test.py b/test/bandit_test.py index a69516d..7d901dd 100644 --- a/test/bandit_test.py +++ b/test/bandit_test.py @@ -3,7 +3,6 @@ from eppo_client.sharders import MD5Sharder, DeterministicSharder from eppo_client.bandit import ( - ActionContext, Attributes, score_numeric_attributes, score_categorical_attributes, @@ -162,10 +161,10 @@ def test_score_categorical_attributes_mixed_coefficients(): def test_weigh_actions_single_action(): - action_scores = [("action1", 1.0)] + action_scores = {"action1": 1.0} gamma = 0.1 probability_floor = 0.1 - expected_weights = [("action1", 1.0)] + expected_weights = {"action1": 1.0} assert ( bandit_evaluator.weigh_actions(action_scores, gamma, probability_floor) == expected_weights @@ -173,31 +172,29 @@ def test_weigh_actions_single_action(): def test_weigh_actions_multiple_actions(): - action_scores = [("action1", 1.0), ("action2", 0.5)] + action_scores = {"action1": 1.0, "action2": 0.5} gamma = 10 probability_floor = 0.1 weights = bandit_evaluator.weigh_actions(action_scores, gamma, probability_floor) assert len(weights) == 2 - action_1_weight = next(weight for action, weight in weights if action == "action1") - assert action_1_weight == pytest.approx(6 / 7, rel=1e-6) - action_2_weight = next(weight for action, weight in weights if action == "action2") - assert action_2_weight == pytest.approx(1 / 7, rel=1e-6) + assert weights["action1"] == pytest.approx(6 / 7, rel=1e-6) + assert weights["action2"] == pytest.approx(1 / 7, rel=1e-6) def test_weight_actions_probability_floor(): - action_scores = [("action1", 1.0), ("action2", 0.5), ("action3", 0.2)] + action_scores = {"action1": 1.0, "action2": 0.5, "action3": 0.2} gamma = 10 probability_floor = 0.3 weights = bandit_evaluator.weigh_actions(action_scores, gamma, probability_floor) assert len(weights) == 3 # note probability floor is normalized by number of actions: 0.3/3 = 0.1 - for _, weight in weights: + for weight in weights.values(): assert weight == pytest.approx(0.1, rel=1e-6) or weight > 0.1 def test_weight_actions_gamma_effect(): - action_scores = [("action1", 1.0), ("action2", 0.5)] + action_scores = {"action1": 1.0, "action2": 0.5} small_gamma = 1.0 large_gamma = 10.0 probability_floor = 0.1 @@ -207,22 +204,17 @@ def test_weight_actions_gamma_effect(): weights_large_gamma = bandit_evaluator.weigh_actions( action_scores, large_gamma, probability_floor ) - - assert next( - weight for action, weight in weights_small_gamma if action == "action1" - ) < next(weight for action, weight in weights_large_gamma if action == "action1") - assert next( - weight for action, weight in weights_small_gamma if action == "action2" - ) > next(weight for action, weight in weights_large_gamma if action == "action2") + assert weights_small_gamma["action1"] < weights_large_gamma["action1"] + assert weights_small_gamma["action2"] > weights_large_gamma["action2"] def test_weight_actions_all_equal_scores(): - action_scores = [("action1", 1.0), ("action2", 1.0), ("action3", 1.0)] + action_scores = {"action1": 1.0, "action2": 1.0, "action3": 1.0} gamma = 0.1 probability_floor = 0.1 weights = bandit_evaluator.weigh_actions(action_scores, gamma, probability_floor) assert len(weights) == 3 - for _, weight in weights: + for weight in weights.values(): assert weight == pytest.approx(1.0 / 3, rel=1e-2) @@ -233,22 +225,16 @@ def test_evaluate_bandit(): subject_attributes = Attributes( numeric_attributes={"age": 25.0}, categorical_attributes={"location": "US"} ) - action_contexts = [ - ActionContext( - action_key="action1", - attributes=Attributes( - numeric_attributes={"price": 10.0}, - categorical_attributes={"category": "A"}, - ), + action_contexts = { + "action1": Attributes( + numeric_attributes={"price": 10.0}, + categorical_attributes={"category": "A"}, ), - ActionContext( - action_key="action2", - attributes=Attributes( - numeric_attributes={"price": 20.0}, - categorical_attributes={"category": "B"}, - ), + "action2": Attributes( + numeric_attributes={"price": 20.0}, + categorical_attributes={"category": "B"}, ), - ] + } coefficients = { "action1": BanditCoefficients( action_key="action1", diff --git a/test/client_bandit_test.py b/test/client_bandit_test.py index 926b2c9..c870219 100644 --- a/test/client_bandit_test.py +++ b/test/client_bandit_test.py @@ -6,7 +6,7 @@ import os from time import sleep from typing import Dict, List -from eppo_client.bandit import BanditResult, ActionContext, Attributes +from eppo_client.bandit import BanditResult, Attributes import httpretty # type: ignore import pytest From 021b3aa4f877568f7c7aeb951be0859520790533 Mon Sep 17 00:00:00 2001 From: Sven Schmit Date: Mon, 10 Jun 2024 10:53:49 -0700 Subject: [PATCH 3/6] version bump --- eppo_client/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eppo_client/version.py b/eppo_client/version.py index 1fe90f6..1173108 100644 --- a/eppo_client/version.py +++ b/eppo_client/version.py @@ -1 +1 @@ -__version__ = "3.1.4" +__version__ = "3.2.0" From 85ee2f254c3ce3ddcddddbc89c98bef7d4cf36be Mon Sep 17 00:00:00 2001 From: Sven Schmit Date: Mon, 10 Jun 2024 11:00:47 -0700 Subject: [PATCH 4/6] add test for Attributes.empty() --- test/bandit_test.py | 70 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/test/bandit_test.py b/test/bandit_test.py index 7d901dd..d63c76e 100644 --- a/test/bandit_test.py +++ b/test/bandit_test.py @@ -319,3 +319,73 @@ def test_evaluate_bandit(): assert evaluation.gamma == bandit_model.gamma assert evaluation.action_score == 4.0 assert pytest.approx(evaluation.action_weight, rel=1e-2) == 0.4926 + + +def test_bandit_no_action_contexts(): + # Mock data + flag_key = "test_flag" + subject_key = "test_subject" + subject_attributes = Attributes( + numeric_attributes={"age": 25.0}, categorical_attributes={"location": "US"} + ) + coefficients = { + "action1": BanditCoefficients( + action_key="action1", + intercept=0.5, + subject_numeric_coefficients=[ + BanditNumericAttributeCoefficient( + attribute_key="age", coefficient=0.1, missing_value_coefficient=0.0 + ) + ], + subject_categorical_coefficients=[ + BanditCategoricalAttributeCoefficient( + attribute_key="location", + missing_value_coefficient=0.0, + value_coefficients={"US": 0.2}, + ) + ], + action_numeric_coefficients=[], + action_categorical_coefficients=[], + ), + "action2": BanditCoefficients( + action_key="action2", + intercept=0.3, + subject_numeric_coefficients=[ + BanditNumericAttributeCoefficient( + attribute_key="age", coefficient=0.3, missing_value_coefficient=0.0 + ) + ], + subject_categorical_coefficients=[ + BanditCategoricalAttributeCoefficient( + attribute_key="location", + missing_value_coefficient=0.0, + value_coefficients={"US": -0.2}, + ) + ], + action_numeric_coefficients=[], + action_categorical_coefficients=[], + ), + } + bandit_model = BanditModelData( + gamma=0.1, + default_action_score=0.0, + action_probability_floor=0.1, + coefficients=coefficients, + ) + + evaluator = BanditEvaluator(sharder=DeterministicSharder({})) + evaluation = evaluator.evaluate_bandit( + flag_key, + subject_key, + subject_attributes, + {"action1": Attributes.empty(), "action2": Attributes.empty()}, + bandit_model, + ) + + assert evaluation.flag_key == flag_key + assert evaluation.subject_key == subject_key + assert evaluation.subject_attributes == subject_attributes + assert evaluation.action_key == "action1" + assert evaluation.gamma == bandit_model.gamma + assert evaluation.action_score == 3.2 + assert pytest.approx(evaluation.action_weight, rel=1e-2) == 0.41 From fcccc1cc387a51d723c9ca0462c973624c2490b0 Mon Sep 17 00:00:00 2001 From: Sven Schmit Date: Mon, 10 Jun 2024 11:03:15 -0700 Subject: [PATCH 5/6] :broom: --- eppo_client/bandit.py | 2 +- eppo_client/client.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/eppo_client/bandit.py b/eppo_client/bandit.py index c41ead7..0529351 100644 --- a/eppo_client/bandit.py +++ b/eppo_client/bandit.py @@ -1,6 +1,6 @@ from dataclasses import dataclass import logging -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional from eppo_client.models import ( BanditCategoricalAttributeCoefficient, diff --git a/eppo_client/client.py b/eppo_client/client.py index 9d4eb9d..9cabaab 100644 --- a/eppo_client/client.py +++ b/eppo_client/client.py @@ -1,7 +1,7 @@ import datetime import logging import json -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Optional from eppo_client.assignment_logger import AssignmentLogger from eppo_client.bandit import BanditEvaluator, BanditResult, Attributes, ActionContexts from eppo_client.configuration_requestor import ( @@ -245,7 +245,8 @@ def get_bandit_action( flag_key (str): The feature flag key that contains the bandit as one of the variations. subject_key (str): The key identifying the subject. subject_context (Attributes): The subject context - actions (Dict[str, Attributes]): The dictionary that maps action keys to their context of actions with their contexts. + actions (Dict[str, Attributes]): The dictionary that maps action keys + to their context of actions with their contexts. default (str): The default variation to use if the subject is not part of the bandit. Returns: From a5d0893c416dd3d564b71b9b8fdf6bcfa0aca67c Mon Sep 17 00:00:00 2001 From: Sven Schmit Date: Mon, 10 Jun 2024 11:07:03 -0700 Subject: [PATCH 6/6] add usage example to get_bandit_action --- eppo_client/client.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/eppo_client/client.py b/eppo_client/client.py index 9cabaab..789e185 100644 --- a/eppo_client/client.py +++ b/eppo_client/client.py @@ -254,7 +254,26 @@ def get_bandit_action( or the assignment if they are not. The BanditResult includes: - variation (str): The assignment key indicating the subject's variation. - action (str): The key of the selected action if the subject is part of the bandit. + + Example: + result = client.get_bandit_action( + "flag_key", + "subject_key", + Attributes( + numeric_attributes={"age": 25}, + categorical_attributes={"country": "USA"}), + { + "action1": Attributes(numeric_attributes={"price": 10.0}, categorical_attributes={"category": "A"}), + "action2": Attributes.empty() + }, + "default" + ) + if result.action is None: + do_variation(result.variation) + else: + do_action(result.action) """ + try: return self.get_bandit_action_detail( flag_key,