From f4be51fb59e7d71c5dd2e33de2136b27c14f8a04 Mon Sep 17 00:00:00 2001 From: Zejin Shi Date: Mon, 17 Sep 2018 14:52:00 -0700 Subject: [PATCH 01/14] Add AS algorithm. --- quantecon/game_theory/__init__.py | 1 + quantecon/game_theory/repeated_game.py | 193 ++++++++++++++++++ .../game_theory/tests/test_repeated_game.py | 46 +++++ 3 files changed, 240 insertions(+) create mode 100644 quantecon/game_theory/repeated_game.py create mode 100644 quantecon/game_theory/tests/test_repeated_game.py diff --git a/quantecon/game_theory/__init__.py b/quantecon/game_theory/__init__.py index 0a3b76e8c..27ca2c270 100644 --- a/quantecon/game_theory/__init__.py +++ b/quantecon/game_theory/__init__.py @@ -11,3 +11,4 @@ from .mclennan_tourky import mclennan_tourky from .vertex_enumeration import vertex_enumeration, vertex_enumeration_gen from .game_generators import * +from .repeated_game import AS diff --git a/quantecon/game_theory/repeated_game.py b/quantecon/game_theory/repeated_game.py new file mode 100644 index 000000000..ef84287a1 --- /dev/null +++ b/quantecon/game_theory/repeated_game.py @@ -0,0 +1,193 @@ +""" +Algorithms for repeated game. + +""" + +import numpy as np +from scipy.spatial import ConvexHull +from numba import jit + +def AS(g, delta, tol=1e-12, max_iter=500, u=np.zeros(2)): + """ + Using AS algorithm to compute the set of payoff pairs of all pure-strategy + subgame-perfect equilibria with public randomization for any repeated + two-player games with perfect monitoring and discounting, following + Abreu and Sannikov (2014). + + Parameters + ---------- + g : NormalFormGame + NormalFormGame instance with 2 players. + + delta : scalar(float) + The discounting factor. + + tol : scalar(float), optional(default=1e-12) + Tolerance for convergence checking. + + max_iter : scalar(int), optional(default=500) + Maximum number of iterations. + + u : ndarray(float, ndim=1) + The initial threat points. + + Returns + ------- + hull : scipy.spatial.ConvexHull + The convex hull of feasible payoff pairs. + """ + best_dev_gains = _best_dev_gains(g, delta) + C = np.empty((4, 2)) + IC = np.empty(2) + payoff = np.empty(2) + # array for checking if payoff is inside the polytope or not + payoff_pts = np.ones(3) + new_pts = np.empty((4, 2)) + # the length of v is limited by |A1|*|A2|*4 + v_new = np.empty((np.prod(g.nums_actions)*4, 2)) + v_old = np.empty((np.prod(g.nums_actions)*4, 2)) + n_new_pt = 0 + + # initialization + payoff_profile_pts = \ + g.payoff_profile_array.reshape(np.prod(g.nums_actions), 2) + v_new[:np.prod(g.nums_actions)] = payoff_profile_pts + n_new_pt = np.prod(g.nums_actions) + + n_iter = 0 + while True: + v_old[:n_new_pt] = v_new[:n_new_pt] + n_old_pt = n_new_pt + hull = ConvexHull(v_old[:n_old_pt]) + + v_new, n_new_pt = \ + update_v(delta, g.nums_actions, g.payoff_arrays, + best_dev_gains, hull.points, hull.vertices, + hull.equations, u, IC, payoff, payoff_pts, + new_pts, v_new) + n_iter += 1 + if n_iter >= max_iter: + break + + # check convergence + if n_new_pt == n_old_pt: + if np.linalg.norm(v_new[:n_new_pt] - v_old[:n_new_pt]) < tol: + break + + # update threat points + update_u(u, v_new[:n_new_pt]) + + hull = ConvexHull(v_new[:n_new_pt]) + + return hull + +@jit() +def _best_dev_gains(g, delta): + """ + Calculate the payoff gains from deviating from the current action to + the best response for each player. + """ + best_dev_gains0 = (1-delta)/delta * \ + (np.max(g.payoff_arrays[0], 0) - g.payoff_arrays[0]) + best_dev_gains1 = (1-delta)/delta * \ + (np.max(g.payoff_arrays[1], 0) - g.payoff_arrays[1]) + + return best_dev_gains0, best_dev_gains1 + +@jit(nopython=True) +def update_v(delta, nums_actions, payoff_arrays, best_dev_gains, points, + vertices, equations, u, IC, payoff, payoff_pts, new_pts, + v_new, tol=1e-10): + """ + Updating the payoff convex hull by iterating all action pairs. + """ + n_new_pt = 0 + for a0 in range(nums_actions[0]): + for a1 in range(nums_actions[1]): + payoff[0] = payoff_arrays[0][a0, a1] + payoff[1] = payoff_arrays[1][a1, a0] + IC[0] = u[0] + best_dev_gains[0][a0, a1] + IC[1] = u[1] + best_dev_gains[1][a1, a0] + + # check if payoff is larger than IC + if (payoff >= IC).all(): + # check if payoff is inside the convex hull + payoff_pts[:2] = payoff + if (np.dot(equations, payoff_pts) <= tol).all(): + v_new[n_new_pt] = payoff + n_new_pt += 1 + continue + + new_pts, n = find_C(new_pts, points, vertices, IC, tol) + + for i in range(n): + v_new[n_new_pt] = delta * new_pts[i] + (1-delta) * payoff + n_new_pt += 1 + + return v_new, n_new_pt + +@jit(nopython=True) +def find_C(C, points, vertices, IC, tol): + """ + Find all the intersection points between the current polytope + and the two IC constraints. + """ + n_IC = [0, 0] + weights = np.empty(2) + # vertices is ordered counterclockwise + for i in range(len(vertices)-1): + intersect(C, n_IC, weights, IC, + points[vertices[i]], points[vertices[i+1]], tol) + + intersect(C, n_IC, weights, IC, + points[vertices[-1]], points[vertices[0]], tol) + + # check the case that IC is a interior point of the polytope + n = n_IC[0] + n_IC[1] + if (n_IC[0] == 1 & n_IC[1] == 1): + C[2, 0] = IC[0] + C[2, 1] = IC[1] + n += 1 + + return C, n + +@jit(nopython=True) +def intersect(C, n_IC, weights, IC, pt0, pt1, tol): + """ + Find the intersection points of a simplex and ICs. + """ + for i in range(2): + if (abs(pt0[i] - pt1[i]) < tol): + None + else: + weights[i] = (pt0[i] - IC[i]) / (pt0[i] - pt1[i]) + + # intersection of IC[j] + if (0 < weights[i] <= 1): + x = (1 - weights[i]) * pt0[1-i] + weights[i] * pt1[1-i] + # x has to be strictly higher than IC[1-j] + # if it is equal, then it means IC is one of the vertex + # it will be added to C in below + if x - IC[1-i] > tol: + C[n_IC[0]+n_IC[1], i] = IC[i] + C[n_IC[0]+n_IC[1], 1-i] = x + n_IC[i] += 1 + elif x - IC[1-i] > -tol: + C[n_IC[0]+n_IC[1], i] = IC[i] + C[n_IC[0]+n_IC[1], 1-i] = x + n_IC[i] += 1 + # to avoid duplication when IC is a vertex + break + return C, n_IC + +@jit(nopython=True) +def update_u(u, v): + """ + Update the threat points. + """ + for i in range(2): + v_min = v[:, i].min() + if u[i] < v_min: + u[i] = v_min + + return u diff --git a/quantecon/game_theory/tests/test_repeated_game.py b/quantecon/game_theory/tests/test_repeated_game.py new file mode 100644 index 000000000..07ae9e1f9 --- /dev/null +++ b/quantecon/game_theory/tests/test_repeated_game.py @@ -0,0 +1,46 @@ +""" +Tests for repeated_game.py + +""" +import numpy as np +from numpy.testing import assert_array_equal +from quantecon.game_theory import Player, NormalFormGame, AS + +class TestAS(): + def setUp(self): + self.game_dicts = [] + + # Example from Abreu and Sannikov (2014) + bimatrix = [[(16, 9), (3, 13), (0, 3)], + [(21, 1), (10, 4), (-1, 0)], + [(9, 0), (5, -4), (-5, -15)]] + vertice_inds = np.array([4, 8, 12, 11, 6, 1]) + d = {'g': NormalFormGame(bimatrix), + 'delta': 0.3, + 'vertices': vertice_inds, + 'u': np.array([0, 0])} + self.game_dicts.append(d) + + # Prisoner's dilemma + bimatrix = [[(9, 9), (1, 10)], + [(10, 1), (3, 3)]] + vertice_inds = np.array([6, 3, 0, 2]) + d = {'g': NormalFormGame(bimatrix), + 'delta': 0.9, + 'vertices': vertice_inds, + 'u': np.array([3, 3])} + self.game_dicts.append(d) + + def test_AS(self): + for d in self.game_dicts: + hull = AS(d['g'], d['delta'], u=d['u']) + assert_array_equal(d['vertices'], hull.vertices) + +if __name__ == '__main__': + import sys + import nose + + argv = sys.argv[:] + argv.append('--verbose') + argv.append('--nocapture') + nose.main(argv=argv, defaultTest=__file__) From a9d24ddcc73083946e700832d0ce5d42aa937450 Mon Sep 17 00:00:00 2001 From: Zejin Shi <> Date: Thu, 20 Sep 2018 22:01:57 -0700 Subject: [PATCH 02/14] Modify variable names. --- quantecon/game_theory/repeated_game.py | 71 +++++++++++++++----------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/quantecon/game_theory/repeated_game.py b/quantecon/game_theory/repeated_game.py index ef84287a1..bd3ca02b0 100644 --- a/quantecon/game_theory/repeated_game.py +++ b/quantecon/game_theory/repeated_game.py @@ -39,53 +39,58 @@ def AS(g, delta, tol=1e-12, max_iter=500, u=np.zeros(2)): best_dev_gains = _best_dev_gains(g, delta) C = np.empty((4, 2)) IC = np.empty(2) - payoff = np.empty(2) + action_profile_payoff = np.empty(2) # array for checking if payoff is inside the polytope or not - payoff_pts = np.ones(3) + # the last entry is set to be 1 + extended_payoff = np.ones(3) + # array to store new points of C in each intersection + # at most 4 new points will be generated new_pts = np.empty((4, 2)) + # array to store the points of W # the length of v is limited by |A1|*|A2|*4 - v_new = np.empty((np.prod(g.nums_actions)*4, 2)) - v_old = np.empty((np.prod(g.nums_actions)*4, 2)) + W_new = np.empty((np.prod(g.nums_actions)*4, 2)) + W_old = np.empty((np.prod(g.nums_actions)*4, 2)) + # count the new points generated in each iteration n_new_pt = 0 # initialization - payoff_profile_pts = \ + payoff_pts = \ g.payoff_profile_array.reshape(np.prod(g.nums_actions), 2) - v_new[:np.prod(g.nums_actions)] = payoff_profile_pts + W_new[:np.prod(g.nums_actions)] = payoff_pts n_new_pt = np.prod(g.nums_actions) n_iter = 0 while True: - v_old[:n_new_pt] = v_new[:n_new_pt] + W_old[:n_new_pt] = W_new[:n_new_pt] n_old_pt = n_new_pt - hull = ConvexHull(v_old[:n_old_pt]) + hull = ConvexHull(W_old[:n_old_pt]) - v_new, n_new_pt = \ - update_v(delta, g.nums_actions, g.payoff_arrays, - best_dev_gains, hull.points, hull.vertices, - hull.equations, u, IC, payoff, payoff_pts, - new_pts, v_new) + W_new, n_new_pt = \ + R(delta, g.nums_actions, g.payoff_arrays, + best_dev_gains, hull.points, hull.vertices, + hull.equations, u, IC, action_profile_payoff, + extended_payoff, new_pts, W_new) n_iter += 1 if n_iter >= max_iter: break # check convergence if n_new_pt == n_old_pt: - if np.linalg.norm(v_new[:n_new_pt] - v_old[:n_new_pt]) < tol: + if np.linalg.norm(W_new[:n_new_pt] - W_old[:n_new_pt]) < tol: break # update threat points - update_u(u, v_new[:n_new_pt]) + update_u(u, W_new[:n_new_pt]) - hull = ConvexHull(v_new[:n_new_pt]) + hull = ConvexHull(W_new[:n_new_pt]) return hull @jit() def _best_dev_gains(g, delta): """ - Calculate the payoff gains from deviating from the current action to - the best response for each player. + Calculate the normalized payoff gains from deviating from the current + action to the best response for each player. """ best_dev_gains0 = (1-delta)/delta * \ (np.max(g.payoff_arrays[0], 0) - g.payoff_arrays[0]) @@ -95,43 +100,47 @@ def _best_dev_gains(g, delta): return best_dev_gains0, best_dev_gains1 @jit(nopython=True) -def update_v(delta, nums_actions, payoff_arrays, best_dev_gains, points, - vertices, equations, u, IC, payoff, payoff_pts, new_pts, - v_new, tol=1e-10): +def R(delta, nums_actions, payoff_arrays, best_dev_gains, points, + vertices, equations, u, IC, action_profile_payoff, + extended_payoff, new_pts, W_new, tol=1e-10): """ Updating the payoff convex hull by iterating all action pairs. + Using the R operator proposed by Abreu and Sannikov 2014. """ n_new_pt = 0 for a0 in range(nums_actions[0]): for a1 in range(nums_actions[1]): - payoff[0] = payoff_arrays[0][a0, a1] - payoff[1] = payoff_arrays[1][a1, a0] + action_profile_payoff[0] = payoff_arrays[0][a0, a1] + action_profile_payoff[1] = payoff_arrays[1][a1, a0] IC[0] = u[0] + best_dev_gains[0][a0, a1] IC[1] = u[1] + best_dev_gains[1][a1, a0] # check if payoff is larger than IC - if (payoff >= IC).all(): + if (action_profile_payoff >= IC).all(): # check if payoff is inside the convex hull - payoff_pts[:2] = payoff - if (np.dot(equations, payoff_pts) <= tol).all(): - v_new[n_new_pt] = payoff + extended_payoff[:2] = action_profile_payoff + if (np.dot(equations, extended_payoff) <= tol).all(): + W_new[n_new_pt] = action_profile_payoff n_new_pt += 1 continue new_pts, n = find_C(new_pts, points, vertices, IC, tol) for i in range(n): - v_new[n_new_pt] = delta * new_pts[i] + (1-delta) * payoff + W_new[n_new_pt] = \ + delta * new_pts[i] + (1-delta) * action_profile_payoff n_new_pt += 1 - return v_new, n_new_pt + return W_new, n_new_pt @jit(nopython=True) def find_C(C, points, vertices, IC, tol): """ Find all the intersection points between the current polytope - and the two IC constraints. + and the two IC constraints. It is done by iterating simplex + counterclockwise. """ + # record the number of intersections for each IC. n_IC = [0, 0] weights = np.empty(2) # vertices is ordered counterclockwise @@ -154,7 +163,7 @@ def find_C(C, points, vertices, IC, tol): @jit(nopython=True) def intersect(C, n_IC, weights, IC, pt0, pt1, tol): """ - Find the intersection points of a simplex and ICs. + Find the intersection points of a simplex and IC constraints. """ for i in range(2): if (abs(pt0[i] - pt1[i]) < tol): From b816aee2e0bfdd07b9ef5218f47e8d78b1ca6bab Mon Sep 17 00:00:00 2001 From: Zejin Shi Date: Wed, 26 Sep 2018 13:56:44 -0700 Subject: [PATCH 03/14] Modify `intersection` and `find_C`. --- quantecon/game_theory/repeated_game.py | 87 ++++++++++--------- .../game_theory/tests/test_repeated_game.py | 2 +- 2 files changed, 48 insertions(+), 41 deletions(-) diff --git a/quantecon/game_theory/repeated_game.py b/quantecon/game_theory/repeated_game.py index bd3ca02b0..228ff0816 100644 --- a/quantecon/game_theory/repeated_game.py +++ b/quantecon/game_theory/repeated_game.py @@ -5,14 +5,14 @@ import numpy as np from scipy.spatial import ConvexHull -from numba import jit +from numba import jit, njit def AS(g, delta, tol=1e-12, max_iter=500, u=np.zeros(2)): """ - Using AS algorithm to compute the set of payoff pairs of all pure-strategy - subgame-perfect equilibria with public randomization for any repeated - two-player games with perfect monitoring and discounting, following - Abreu and Sannikov (2014). + Using AS algorithm to compute the set of payoff pairs of all + pure-strategy subgame-perfect equilibria with public randomization + for any repeated two-player games with perfect monitoring and + discounting, following Abreu and Sannikov (2014). Parameters ---------- @@ -70,6 +70,7 @@ def AS(g, delta, tol=1e-12, max_iter=500, u=np.zeros(2)): best_dev_gains, hull.points, hull.vertices, hull.equations, u, IC, action_profile_payoff, extended_payoff, new_pts, W_new) + n_iter += 1 if n_iter >= max_iter: break @@ -99,7 +100,7 @@ def _best_dev_gains(g, delta): return best_dev_gains0, best_dev_gains1 -@jit(nopython=True) +@njit def R(delta, nums_actions, payoff_arrays, best_dev_gains, points, vertices, equations, u, IC, action_profile_payoff, extended_payoff, new_pts, W_new, tol=1e-10): @@ -124,7 +125,8 @@ def R(delta, nums_actions, payoff_arrays, best_dev_gains, points, n_new_pt += 1 continue - new_pts, n = find_C(new_pts, points, vertices, IC, tol) + new_pts, n = find_C(new_pts, points, vertices, equations, + extended_payoff, IC, tol) for i in range(n): W_new[n_new_pt] = \ @@ -133,63 +135,68 @@ def R(delta, nums_actions, payoff_arrays, best_dev_gains, points, return W_new, n_new_pt -@jit(nopython=True) -def find_C(C, points, vertices, IC, tol): +@njit +def find_C(C, points, vertices, equations, extended_payoff, IC, tol): """ Find all the intersection points between the current polytope and the two IC constraints. It is done by iterating simplex counterclockwise. """ # record the number of intersections for each IC. - n_IC = [0, 0] + n = 0 weights = np.empty(2) # vertices is ordered counterclockwise for i in range(len(vertices)-1): - intersect(C, n_IC, weights, IC, - points[vertices[i]], points[vertices[i+1]], tol) + n = intersect(C, n, weights, IC, + points[vertices[i]], + points[vertices[i+1]], tol) - intersect(C, n_IC, weights, IC, - points[vertices[-1]], points[vertices[0]], tol) + n = intersect(C, n, weights, IC, + points[vertices[-1]], + points[vertices[0]], tol) # check the case that IC is a interior point of the polytope - n = n_IC[0] + n_IC[1] - if (n_IC[0] == 1 & n_IC[1] == 1): - C[2, 0] = IC[0] - C[2, 1] = IC[1] + extended_payoff[:2] = IC + if (np.dot(equations, extended_payoff) <= tol).all(): + C[n, :] = IC n += 1 return C, n -@jit(nopython=True) -def intersect(C, n_IC, weights, IC, pt0, pt1, tol): +@njit +def intersect(C, n, weights, IC, pt0, pt1, tol): """ - Find the intersection points of a simplex and IC constraints. + Find the intersection points of a half-closed simplex + (pt0, pt1] and IC constraints. """ for i in range(2): if (abs(pt0[i] - pt1[i]) < tol): - None + if (abs(pt1[i] - IC[i]) < tol): + x = pt1[1-i] + else: + continue else: weights[i] = (pt0[i] - IC[i]) / (pt0[i] - pt1[i]) - - # intersection of IC[j] + # pt0 is not included to avoid duplication + # weights in (0, 1] if (0 < weights[i] <= 1): x = (1 - weights[i]) * pt0[1-i] + weights[i] * pt1[1-i] - # x has to be strictly higher than IC[1-j] - # if it is equal, then it means IC is one of the vertex - # it will be added to C in below - if x - IC[1-i] > tol: - C[n_IC[0]+n_IC[1], i] = IC[i] - C[n_IC[0]+n_IC[1], 1-i] = x - n_IC[i] += 1 - elif x - IC[1-i] > -tol: - C[n_IC[0]+n_IC[1], i] = IC[i] - C[n_IC[0]+n_IC[1], 1-i] = x - n_IC[i] += 1 - # to avoid duplication when IC is a vertex - break - return C, n_IC - -@jit(nopython=True) + else: + continue + # x has to be strictly higher than IC[1-j] + # if it is equal, then it means IC is one of the vertex + # it will be added to C in below + if x - IC[1-i] > tol: + C[n, i] = IC[i] + C[n, 1-i] = x + n += 1 + elif x - IC[1-i] > -tol: + # to avoid duplication when IC is a vertex + break + + return n + +@njit def update_u(u, v): """ Update the threat points. diff --git a/quantecon/game_theory/tests/test_repeated_game.py b/quantecon/game_theory/tests/test_repeated_game.py index 07ae9e1f9..816ef053d 100644 --- a/quantecon/game_theory/tests/test_repeated_game.py +++ b/quantecon/game_theory/tests/test_repeated_game.py @@ -24,7 +24,7 @@ def setUp(self): # Prisoner's dilemma bimatrix = [[(9, 9), (1, 10)], [(10, 1), (3, 3)]] - vertice_inds = np.array([6, 3, 0, 2]) + vertice_inds = np.array([6, 3, 0, 1]) d = {'g': NormalFormGame(bimatrix), 'delta': 0.9, 'vertices': vertice_inds, From 73a6faa95e294217d87347de3475a332f201f7fa Mon Sep 17 00:00:00 2001 From: Zejin Shi Date: Wed, 26 Sep 2018 14:18:46 -0700 Subject: [PATCH 04/14] Incorporate with `RepeatedGame` class. --- quantecon/game_theory/__init__.py | 2 +- quantecon/game_theory/repeated_game.py | 162 ++++++++++-------- .../game_theory/tests/test_repeated_game.py | 9 +- 3 files changed, 97 insertions(+), 76 deletions(-) diff --git a/quantecon/game_theory/__init__.py b/quantecon/game_theory/__init__.py index 27ca2c270..1b13ed846 100644 --- a/quantecon/game_theory/__init__.py +++ b/quantecon/game_theory/__init__.py @@ -11,4 +11,4 @@ from .mclennan_tourky import mclennan_tourky from .vertex_enumeration import vertex_enumeration, vertex_enumeration_gen from .game_generators import * -from .repeated_game import AS +from .repeated_game import RepeatedGame diff --git a/quantecon/game_theory/repeated_game.py b/quantecon/game_theory/repeated_game.py index 228ff0816..b8ce85893 100644 --- a/quantecon/game_theory/repeated_game.py +++ b/quantecon/game_theory/repeated_game.py @@ -1,5 +1,5 @@ """ -Algorithms for repeated game. +Tools for repeated game. """ @@ -7,96 +7,116 @@ from scipy.spatial import ConvexHull from numba import jit, njit -def AS(g, delta, tol=1e-12, max_iter=500, u=np.zeros(2)): +class RepeatedGame: """ - Using AS algorithm to compute the set of payoff pairs of all - pure-strategy subgame-perfect equilibria with public randomization - for any repeated two-player games with perfect monitoring and - discounting, following Abreu and Sannikov (2014). + Class representing an N-player repeated game. Parameters ---------- - g : NormalFormGame - NormalFormGame instance with 2 players. + stage_game : NormalFormGame + The stage game used to create the repeated game. delta : scalar(float) - The discounting factor. + The common discount rate at which all players discount the future. - tol : scalar(float), optional(default=1e-12) - Tolerance for convergence checking. - - max_iter : scalar(int), optional(default=500) - Maximum number of iterations. - - u : ndarray(float, ndim=1) - The initial threat points. - - Returns - ------- - hull : scipy.spatial.ConvexHull - The convex hull of feasible payoff pairs. """ - best_dev_gains = _best_dev_gains(g, delta) - C = np.empty((4, 2)) - IC = np.empty(2) - action_profile_payoff = np.empty(2) - # array for checking if payoff is inside the polytope or not - # the last entry is set to be 1 - extended_payoff = np.ones(3) - # array to store new points of C in each intersection - # at most 4 new points will be generated - new_pts = np.empty((4, 2)) - # array to store the points of W - # the length of v is limited by |A1|*|A2|*4 - W_new = np.empty((np.prod(g.nums_actions)*4, 2)) - W_old = np.empty((np.prod(g.nums_actions)*4, 2)) - # count the new points generated in each iteration - n_new_pt = 0 - - # initialization - payoff_pts = \ - g.payoff_profile_array.reshape(np.prod(g.nums_actions), 2) - W_new[:np.prod(g.nums_actions)] = payoff_pts - n_new_pt = np.prod(g.nums_actions) - - n_iter = 0 - while True: - W_old[:n_new_pt] = W_new[:n_new_pt] - n_old_pt = n_new_pt - hull = ConvexHull(W_old[:n_old_pt]) - - W_new, n_new_pt = \ - R(delta, g.nums_actions, g.payoff_arrays, - best_dev_gains, hull.points, hull.vertices, - hull.equations, u, IC, action_profile_payoff, - extended_payoff, new_pts, W_new) - - n_iter += 1 - if n_iter >= max_iter: - break - - # check convergence - if n_new_pt == n_old_pt: - if np.linalg.norm(W_new[:n_new_pt] - W_old[:n_new_pt]) < tol: + def __init__(self, stage_game, delta): + self.sg = stage_game + self.delta = delta + self.N = stage_game.N + self.nums_actions = stage_game.nums_actions + + def AS(self, tol=1e-12, max_iter=500, u=np.zeros(2)): + """ + Using AS algorithm to compute the set of payoff pairs of all + pure-strategy subgame-perfect equilibria with public randomization + for any repeated two-player games with perfect monitoring and + discounting, following Abreu and Sannikov (2014). + + Parameters + ---------- + g : NormalFormGame + NormalFormGame instance with 2 players. + + delta : scalar(float) + The discounting factor. + + tol : scalar(float), optional(default=1e-12) + Tolerance for convergence checkinsg. + + max_iter : scalar(int), optional(default=500) + Maximum number of iterations. + + u : ndarray(float, ndim=1) + The initial threat points. + + Returns + ------- + hull : scipy.spatial.ConvexHull + The convex hull of feasible payoff pairs. + """ + sg, delta = self.sg, self.delta + best_dev_gains = _best_dev_gains(sg, delta) + C = np.empty((4, 2)) + IC = np.empty(2) + action_profile_payoff = np.empty(2) + # array for checking if payoff is inside the polytope or not + # the last entry is set to be 1 + extended_payoff = np.ones(3) + # array to store new points of C in each intersection + # at most 4 new points will be generated + new_pts = np.empty((4, 2)) + # array to store the points of W + # the length of v is limited by |A1|*|A2|*4 + W_new = np.empty((np.prod(sg.nums_actions)*4, 2)) + W_old = np.empty((np.prod(sg.nums_actions)*4, 2)) + # count the new points generated in each iteration + n_new_pt = 0 + + # initialization + payoff_pts = \ + sg.payoff_profile_array.reshape(np.prod(sg.nums_actions), 2) + W_new[:np.prod(sg.nums_actions)] = payoff_pts + n_new_pt = np.prod(sg.nums_actions) + + n_iter = 0 + while True: + W_old[:n_new_pt] = W_new[:n_new_pt] + n_old_pt = n_new_pt + hull = ConvexHull(W_old[:n_old_pt]) + + W_new, n_new_pt = \ + R(delta, sg.nums_actions, sg.payoff_arrays, + best_dev_gains, hull.points, hull.vertices, + hull.equations, u, IC, action_profile_payoff, + extended_payoff, new_pts, W_new) + + n_iter += 1 + if n_iter >= max_iter: break - # update threat points - update_u(u, W_new[:n_new_pt]) + # check convergence + if n_new_pt == n_old_pt: + if np.linalg.norm(W_new[:n_new_pt] - W_old[:n_new_pt]) < tol: + break + + # update threat points + update_u(u, W_new[:n_new_pt]) - hull = ConvexHull(W_new[:n_new_pt]) + hull = ConvexHull(W_new[:n_new_pt]) - return hull + return hull @jit() -def _best_dev_gains(g, delta): +def _best_dev_gains(sg, delta): """ Calculate the normalized payoff gains from deviating from the current action to the best response for each player. """ best_dev_gains0 = (1-delta)/delta * \ - (np.max(g.payoff_arrays[0], 0) - g.payoff_arrays[0]) + (np.max(sg.payoff_arrays[0], 0) - sg.payoff_arrays[0]) best_dev_gains1 = (1-delta)/delta * \ - (np.max(g.payoff_arrays[1], 0) - g.payoff_arrays[1]) + (np.max(sg.payoff_arrays[1], 0) - sg.payoff_arrays[1]) return best_dev_gains0, best_dev_gains1 diff --git a/quantecon/game_theory/tests/test_repeated_game.py b/quantecon/game_theory/tests/test_repeated_game.py index 816ef053d..7f6b3c5c1 100644 --- a/quantecon/game_theory/tests/test_repeated_game.py +++ b/quantecon/game_theory/tests/test_repeated_game.py @@ -4,7 +4,7 @@ """ import numpy as np from numpy.testing import assert_array_equal -from quantecon.game_theory import Player, NormalFormGame, AS +from quantecon.game_theory import Player, NormalFormGame, RepeatedGame class TestAS(): def setUp(self): @@ -15,7 +15,7 @@ def setUp(self): [(21, 1), (10, 4), (-1, 0)], [(9, 0), (5, -4), (-5, -15)]] vertice_inds = np.array([4, 8, 12, 11, 6, 1]) - d = {'g': NormalFormGame(bimatrix), + d = {'sg': NormalFormGame(bimatrix), 'delta': 0.3, 'vertices': vertice_inds, 'u': np.array([0, 0])} @@ -25,7 +25,7 @@ def setUp(self): bimatrix = [[(9, 9), (1, 10)], [(10, 1), (3, 3)]] vertice_inds = np.array([6, 3, 0, 1]) - d = {'g': NormalFormGame(bimatrix), + d = {'sg': NormalFormGame(bimatrix), 'delta': 0.9, 'vertices': vertice_inds, 'u': np.array([3, 3])} @@ -33,7 +33,8 @@ def setUp(self): def test_AS(self): for d in self.game_dicts: - hull = AS(d['g'], d['delta'], u=d['u']) + rpg = RepeatedGame(d['sg'], d['delta']) + hull = rpg.AS(u=d['u']) assert_array_equal(d['vertices'], hull.vertices) if __name__ == '__main__': From c27317639ab33dc6f7bf47f294329a149b03deca Mon Sep 17 00:00:00 2001 From: Zejin Shi Date: Wed, 26 Sep 2018 14:56:21 -0700 Subject: [PATCH 05/14] Compare vertex values in the test. --- .../game_theory/tests/test_repeated_game.py | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/quantecon/game_theory/tests/test_repeated_game.py b/quantecon/game_theory/tests/test_repeated_game.py index 7f6b3c5c1..25788f5f2 100644 --- a/quantecon/game_theory/tests/test_repeated_game.py +++ b/quantecon/game_theory/tests/test_repeated_game.py @@ -3,7 +3,7 @@ """ import numpy as np -from numpy.testing import assert_array_equal +from numpy.testing import assert_allclose from quantecon.game_theory import Player, NormalFormGame, RepeatedGame class TestAS(): @@ -14,28 +14,36 @@ def setUp(self): bimatrix = [[(16, 9), (3, 13), (0, 3)], [(21, 1), (10, 4), (-1, 0)], [(9, 0), (5, -4), (-5, -15)]] - vertice_inds = np.array([4, 8, 12, 11, 6, 1]) + vertices = np.array([[7.33770472e+00, 1.09826253e+01], + [1.12568240e+00, 2.80000000e+00], + [7.33770472e+00, 4.44089210e-16], + [7.86308964e+00, 4.44089210e-16], + [1.97917977e+01, 2.80000000e+00], + [1.55630896e+01, 9.10000000e+00]]) d = {'sg': NormalFormGame(bimatrix), 'delta': 0.3, - 'vertices': vertice_inds, - 'u': np.array([0, 0])} + 'vertices': vertices, + 'u': np.zeros(2)} self.game_dicts.append(d) # Prisoner's dilemma bimatrix = [[(9, 9), (1, 10)], [(10, 1), (3, 3)]] - vertice_inds = np.array([6, 3, 0, 1]) + vertices = np.array([[3. , 3. ], + [9.75, 3. ], + [9. , 9. ], + [3. , 9.75]]) d = {'sg': NormalFormGame(bimatrix), 'delta': 0.9, - 'vertices': vertice_inds, - 'u': np.array([3, 3])} + 'vertices': vertices, + 'u': np.array([3., 3.])} self.game_dicts.append(d) def test_AS(self): for d in self.game_dicts: rpg = RepeatedGame(d['sg'], d['delta']) hull = rpg.AS(u=d['u']) - assert_array_equal(d['vertices'], hull.vertices) + assert_allclose(hull.points[hull.vertices], d['vertices']) if __name__ == '__main__': import sys From 4687e9a81845f95da761ee1bb5d0e947134e28ce Mon Sep 17 00:00:00 2001 From: Zejin Shi Date: Wed, 3 Oct 2018 20:39:12 -0700 Subject: [PATCH 06/14] Add docstrings. --- quantecon/game_theory/repeated_game.py | 186 +++++++++++++++++++++++-- 1 file changed, 175 insertions(+), 11 deletions(-) diff --git a/quantecon/game_theory/repeated_game.py b/quantecon/game_theory/repeated_game.py index b8ce85893..9d45f342c 100644 --- a/quantecon/game_theory/repeated_game.py +++ b/quantecon/game_theory/repeated_game.py @@ -35,11 +35,8 @@ def AS(self, tol=1e-12, max_iter=500, u=np.zeros(2)): Parameters ---------- - g : NormalFormGame - NormalFormGame instance with 2 players. - - delta : scalar(float) - The discounting factor. + rpd : RepeatedGame + Two player repeated game. tol : scalar(float), optional(default=1e-12) Tolerance for convergence checkinsg. @@ -47,7 +44,7 @@ def AS(self, tol=1e-12, max_iter=500, u=np.zeros(2)): max_iter : scalar(int), optional(default=500) Maximum number of iterations. - u : ndarray(float, ndim=1) + u : ndarray(float, ndim=1), optional(default=np.zeros(2)) The initial threat points. Returns @@ -112,6 +109,28 @@ def _best_dev_gains(sg, delta): """ Calculate the normalized payoff gains from deviating from the current action to the best response for each player. + + Parameters + ---------- + sg : NormalFormGame + The stage game. + + delta : scalar(float) + The common discount rate at which all players discount the future. + + Returns + ------- + best_dev_gains0 : ndarray(float, ndim=2) + The normalized best deviation payoff gain arrays. + best_dev_gains[0][a0, a1] is normalized payoff gain + player 0 can get if originally players are choosing + a0 and a1, and player 0 deviates to the best response action. + + best_dev_gains1 : ndarray(float, ndim=2) + The normalized best deviation payoff gain arrays. + best_dev_gains[1][a0, a1] is normalized payoff gain + player 1 can get if originally players are choosing + a0 and a1, and player 1 deviates to the best response action. """ best_dev_gains0 = (1-delta)/delta * \ (np.max(sg.payoff_arrays[0], 0) - sg.payoff_arrays[0]) @@ -127,6 +146,70 @@ def R(delta, nums_actions, payoff_arrays, best_dev_gains, points, """ Updating the payoff convex hull by iterating all action pairs. Using the R operator proposed by Abreu and Sannikov 2014. + + Parameters + ---------- + delta : scalar(float) + The common discount rate at which all players discount + the future. + + nums_actions : tuple(int) + Tuple of the numbers of actions, one for each player. + + payoff_arrays : tuple(ndarray(float, ndim=2)) + Tuple of the payoff arrays, one for each player. + + best_dev_gains : tuple(ndarray(float, ndim=2)) + Tuple of the normalized best deviation payoff gain arrays. + best_dev_gains[i][a0, a1] is payoff gain player i + can get if originally players are choosing a0 and a1, + and player i deviates to the best response action. + + points : ndarray(float, ndim=2) + Coordinates of the points in the W, which construct a + feasible payoff convex hull. + + vertices : ndarray(float, ndim=1) + Indices of points forming the vertices of the convex hull, + which are in counterclockwise order. + + equations : ndarray(float, ndim=2) + [normal, offset] forming the hyperplane equation of the facet + + u : ndarray(float, ndim=1) + The threat points. + + IC : ndarray(float, ndim=1) + The minimum IC continuation values. + + action_profile_payoff : ndarray(float, ndim=1) + Array of payoff for one action profile. + + extended_payoff : ndarray(float, ndim=2) + The array [payoff0, payoff1, 1] for checking if + [payoff0, payoff1] is in the feasible payoff convex hull. + + new_pts : ndarray(float, ndim=1) + The 4 by 2 array for storing the generated potential + extreme points of one action profile. One action profile + can only generate at most 4 points. + + W_new : ndarray(float, ndim=2) + Array for storing the coordinates of the generated potential + extreme points that construct a new feasible payoff convex hull. + + tol: scalar(float), optional(default=1e-10) + The tolerance for checking if two values are equal. + + Returns + ------- + W_new : ndarray(float, ndim=2) + The coordinates of the generated potential extreme points + that construct a new feasible payoff convex hull. + + n_new_pt : scalar(int) + The number of points in W_new that construct the feasible + payoff convex hull. """ n_new_pt = 0 for a0 in range(nums_actions[0]): @@ -161,6 +244,42 @@ def find_C(C, points, vertices, equations, extended_payoff, IC, tol): Find all the intersection points between the current polytope and the two IC constraints. It is done by iterating simplex counterclockwise. + + Parameters + ---------- + C : ndarray(float, ndim=2) + The 4 by 2 array for storing the generated potential + extreme points of one action profile. One action profile + can only generate at most 4 points. + + points : ndarray(float, ndim=2) + Coordinates of the points in the W, which construct a + feasible payoff convex hull. + + vertices : ndarray(float, ndim=1) + Indices of points forming the vertices of the convex hull, + which are in counterclockwise order. + + equations : ndarray(float, ndim=2) + [normal, offset] forming the hyperplane equation of the facet + + extended_payoff : ndarray(float, ndim=1) + The array [payoff0, payoff1, 1] for checking if + [payoff0, payoff1] is in the feasible payoff convex hull. + + IC : ndarray(float, ndim=1) + The minimum IC continuation values. + + tol : scalar(float) + The tolerance for checking if two values are equal. + + Returns + ------- + C : ndarray(float, ndim=2) + The generated potential extreme points. + + n : scalar(int) + The number of found intersection points. """ # record the number of intersections for each IC. n = 0 @@ -188,6 +307,37 @@ def intersect(C, n, weights, IC, pt0, pt1, tol): """ Find the intersection points of a half-closed simplex (pt0, pt1] and IC constraints. + + Parameters + ---------- + C : ndarray(float, ndim=2) + The 4 by 2 array for storing the generated points of + one action profile. One action profile can only + generate at most 4 points. + + n : scalar(int) + The number of intersection points that have been found. + + weights : ndarray(float, ndim=1) + The size 2 array for storing the weights when calculate + the intersection point of simplex and IC constraints. + + IC : ndarray(float, ndim=1) + The minimum IC continuation values. + + pt0 : ndarray(float, ndim=1) + Coordinates of the starting point of the simplex. + + pt1 : ndarray(float, ndim=1) + Coordinates of the ending point of the simplex. + + tol : scalar(float) + The tolerance for checking if two values are equal. + + Returns + ------- + n : scalar(int) + The updated number of found intersection points. """ for i in range(2): if (abs(pt0[i] - pt1[i]) < tol): @@ -217,13 +367,27 @@ def intersect(C, n, weights, IC, pt0, pt1, tol): return n @njit -def update_u(u, v): +def update_u(u, W): """ - Update the threat points. + Update the threat points if it not feasible in the new W, + by the minimum of new feasible payoffs. + + Parameters + ---------- + u : ndarray(float, ndim=1) + The threat points. + + W : ndarray(float, ndim=1) + The points that construct the feasible payoff convex hull. + + Returns + ------- + u : ndarray(float, ndim=1) + The updated threat points. """ for i in range(2): - v_min = v[:, i].min() - if u[i] < v_min: - u[i] = v_min + W_min = W[:, i].min() + if u[i] < W_min: + u[i] = W_min return u From 4e7efa59d4992ff9beaf5bf0cb89ccf412e2bb16 Mon Sep 17 00:00:00 2001 From: Zejin Shi Date: Mon, 5 Nov 2018 15:46:16 -0700 Subject: [PATCH 07/14] FIX: solve the bug of cached keyword `u`. --- quantecon/game_theory/repeated_game.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/quantecon/game_theory/repeated_game.py b/quantecon/game_theory/repeated_game.py index 9d45f342c..8037e7ef1 100644 --- a/quantecon/game_theory/repeated_game.py +++ b/quantecon/game_theory/repeated_game.py @@ -26,7 +26,7 @@ def __init__(self, stage_game, delta): self.N = stage_game.N self.nums_actions = stage_game.nums_actions - def AS(self, tol=1e-12, max_iter=500, u=np.zeros(2)): + def AS(self, tol=1e-12, max_iter=500, u_init=np.zeros(2)): """ Using AS algorithm to compute the set of payoff pairs of all pure-strategy subgame-perfect equilibria with public randomization @@ -44,7 +44,7 @@ def AS(self, tol=1e-12, max_iter=500, u=np.zeros(2)): max_iter : scalar(int), optional(default=500) Maximum number of iterations. - u : ndarray(float, ndim=1), optional(default=np.zeros(2)) + u_init : ndarray(float, ndim=1), optional(default=np.zeros(2)) The initial threat points. Returns @@ -70,6 +70,9 @@ def AS(self, tol=1e-12, max_iter=500, u=np.zeros(2)): # count the new points generated in each iteration n_new_pt = 0 + # copy the threat points + u = np.copy(u_init) + # initialization payoff_pts = \ sg.payoff_profile_array.reshape(np.prod(sg.nums_actions), 2) From c850c52b677430ac6f72b83412ac701abb32207d Mon Sep 17 00:00:00 2001 From: Zejin Shi Date: Fri, 9 Nov 2018 16:12:56 -0700 Subject: [PATCH 08/14] Modify docstrings. --- quantecon/game_theory/repeated_game.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/quantecon/game_theory/repeated_game.py b/quantecon/game_theory/repeated_game.py index 8037e7ef1..dc2e48e2d 100644 --- a/quantecon/game_theory/repeated_game.py +++ b/quantecon/game_theory/repeated_game.py @@ -39,13 +39,13 @@ def AS(self, tol=1e-12, max_iter=500, u_init=np.zeros(2)): Two player repeated game. tol : scalar(float), optional(default=1e-12) - Tolerance for convergence checkinsg. + Tolerance for convergence checking. max_iter : scalar(int), optional(default=500) Maximum number of iterations. u_init : ndarray(float, ndim=1), optional(default=np.zeros(2)) - The initial threat points. + The initial guess of threat points. Returns ------- @@ -57,8 +57,8 @@ def AS(self, tol=1e-12, max_iter=500, u_init=np.zeros(2)): C = np.empty((4, 2)) IC = np.empty(2) action_profile_payoff = np.empty(2) - # array for checking if payoff is inside the polytope or not - # the last entry is set to be 1 + # auxiliary array for checking if payoff is inside the convex hull + # first two entries for payoff point, and the last entry is 1. extended_payoff = np.ones(3) # array to store new points of C in each intersection # at most 4 new points will be generated @@ -131,9 +131,9 @@ def _best_dev_gains(sg, delta): best_dev_gains1 : ndarray(float, ndim=2) The normalized best deviation payoff gain arrays. - best_dev_gains[1][a0, a1] is normalized payoff gain + best_dev_gains[1][a1, a0] is normalized payoff gain player 1 can get if originally players are choosing - a0 and a1, and player 1 deviates to the best response action. + a1 and a0, and player 1 deviates to the best response action. """ best_dev_gains0 = (1-delta)/delta * \ (np.max(sg.payoff_arrays[0], 0) - sg.payoff_arrays[0]) @@ -164,8 +164,8 @@ def R(delta, nums_actions, payoff_arrays, best_dev_gains, points, best_dev_gains : tuple(ndarray(float, ndim=2)) Tuple of the normalized best deviation payoff gain arrays. - best_dev_gains[i][a0, a1] is payoff gain player i - can get if originally players are choosing a0 and a1, + best_dev_gains[i][ai, a-i] is payoff gain player i + can get if originally players are choosing ai and a-i, and player i deviates to the best response action. points : ndarray(float, ndim=2) @@ -244,7 +244,7 @@ def R(delta, nums_actions, payoff_arrays, best_dev_gains, points, @njit def find_C(C, points, vertices, equations, extended_payoff, IC, tol): """ - Find all the intersection points between the current polytope + Find all the intersection points between the current convex hull and the two IC constraints. It is done by iterating simplex counterclockwise. @@ -284,7 +284,6 @@ def find_C(C, points, vertices, equations, extended_payoff, IC, tol): n : scalar(int) The number of found intersection points. """ - # record the number of intersections for each IC. n = 0 weights = np.empty(2) # vertices is ordered counterclockwise @@ -297,7 +296,7 @@ def find_C(C, points, vertices, equations, extended_payoff, IC, tol): points[vertices[-1]], points[vertices[0]], tol) - # check the case that IC is a interior point of the polytope + # check the case that IC is an interior point of the convex hull extended_payoff[:2] = IC if (np.dot(equations, extended_payoff) <= tol).all(): C[n, :] = IC From 30e8852a13a6f7a21944353d04e3886b7005c45f Mon Sep 17 00:00:00 2001 From: Zejin Shi Date: Fri, 9 Nov 2018 16:16:18 -0700 Subject: [PATCH 09/14] FIX: change keyword 'u' to 'u_init' in test. --- quantecon/game_theory/tests/test_repeated_game.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quantecon/game_theory/tests/test_repeated_game.py b/quantecon/game_theory/tests/test_repeated_game.py index 25788f5f2..d0572a7fc 100644 --- a/quantecon/game_theory/tests/test_repeated_game.py +++ b/quantecon/game_theory/tests/test_repeated_game.py @@ -42,7 +42,7 @@ def setUp(self): def test_AS(self): for d in self.game_dicts: rpg = RepeatedGame(d['sg'], d['delta']) - hull = rpg.AS(u=d['u']) + hull = rpg.AS(u_init=d['u']) assert_allclose(hull.points[hull.vertices], d['vertices']) if __name__ == '__main__': From 1c7c30054a48b27af3e12f9c225d3f9ba8154946 Mon Sep 17 00:00:00 2001 From: Zejin Shi Date: Mon, 12 Nov 2018 17:56:39 -0700 Subject: [PATCH 10/14] FIX: minor modifications. --- quantecon/game_theory/repeated_game.py | 58 ++++++++++++++++---------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/quantecon/game_theory/repeated_game.py b/quantecon/game_theory/repeated_game.py index dc2e48e2d..4fdba4253 100644 --- a/quantecon/game_theory/repeated_game.py +++ b/quantecon/game_theory/repeated_game.py @@ -7,6 +7,7 @@ from scipy.spatial import ConvexHull from numba import jit, njit + class RepeatedGame: """ Class representing an N-player repeated game. @@ -50,9 +51,20 @@ def AS(self, tol=1e-12, max_iter=500, u_init=np.zeros(2)): Returns ------- hull : scipy.spatial.ConvexHull - The convex hull of feasible payoff pairs. + The convex hull of equilibrium payoff pairs. + + References + ---------- + .. [1] Abreu, Dilip, and Yuliy Sannikov. "An algorithm for + two‐player repeated games with perfect monitoring." Theoretical + Economics 9.2 (2014): 313-338. """ sg, delta = self.sg, self.delta + + if sg.N != 2: + msg = "this algorithm only applies to repeated two-player games." + raise NotImplementedError(msg) + best_dev_gains = _best_dev_gains(sg, delta) C = np.empty((4, 2)) IC = np.empty(2) @@ -86,10 +98,10 @@ def AS(self, tol=1e-12, max_iter=500, u_init=np.zeros(2)): hull = ConvexHull(W_old[:n_old_pt]) W_new, n_new_pt = \ - R(delta, sg.nums_actions, sg.payoff_arrays, - best_dev_gains, hull.points, hull.vertices, - hull.equations, u, IC, action_profile_payoff, - extended_payoff, new_pts, W_new) + _R(delta, sg.nums_actions, sg.payoff_arrays, + best_dev_gains, hull.points, hull.vertices, + hull.equations, u, IC, action_profile_payoff, + extended_payoff, new_pts, W_new) n_iter += 1 if n_iter >= max_iter: @@ -101,13 +113,13 @@ def AS(self, tol=1e-12, max_iter=500, u_init=np.zeros(2)): break # update threat points - update_u(u, W_new[:n_new_pt]) + _update_u(u, W_new[:n_new_pt]) hull = ConvexHull(W_new[:n_new_pt]) return hull -@jit() + def _best_dev_gains(sg, delta): """ Calculate the normalized payoff gains from deviating from the current @@ -142,10 +154,11 @@ def _best_dev_gains(sg, delta): return best_dev_gains0, best_dev_gains1 + @njit -def R(delta, nums_actions, payoff_arrays, best_dev_gains, points, - vertices, equations, u, IC, action_profile_payoff, - extended_payoff, new_pts, W_new, tol=1e-10): +def _R(delta, nums_actions, payoff_arrays, best_dev_gains, points, + vertices, equations, u, IC, action_profile_payoff, + extended_payoff, new_pts, W_new, tol=1e-10): """ Updating the payoff convex hull by iterating all action pairs. Using the R operator proposed by Abreu and Sannikov 2014. @@ -231,8 +244,8 @@ def R(delta, nums_actions, payoff_arrays, best_dev_gains, points, n_new_pt += 1 continue - new_pts, n = find_C(new_pts, points, vertices, equations, - extended_payoff, IC, tol) + new_pts, n = _find_C(new_pts, points, vertices, equations, + extended_payoff, IC, tol) for i in range(n): W_new[n_new_pt] = \ @@ -241,8 +254,9 @@ def R(delta, nums_actions, payoff_arrays, best_dev_gains, points, return W_new, n_new_pt + @njit -def find_C(C, points, vertices, equations, extended_payoff, IC, tol): +def _find_C(C, points, vertices, equations, extended_payoff, IC, tol): """ Find all the intersection points between the current convex hull and the two IC constraints. It is done by iterating simplex @@ -288,13 +302,13 @@ def find_C(C, points, vertices, equations, extended_payoff, IC, tol): weights = np.empty(2) # vertices is ordered counterclockwise for i in range(len(vertices)-1): - n = intersect(C, n, weights, IC, - points[vertices[i]], - points[vertices[i+1]], tol) + n = _intersect(C, n, weights, IC, + points[vertices[i]], + points[vertices[i+1]], tol) - n = intersect(C, n, weights, IC, - points[vertices[-1]], - points[vertices[0]], tol) + n = _intersect(C, n, weights, IC, + points[vertices[-1]], + points[vertices[0]], tol) # check the case that IC is an interior point of the convex hull extended_payoff[:2] = IC @@ -304,8 +318,9 @@ def find_C(C, points, vertices, equations, extended_payoff, IC, tol): return C, n + @njit -def intersect(C, n, weights, IC, pt0, pt1, tol): +def _intersect(C, n, weights, IC, pt0, pt1, tol): """ Find the intersection points of a half-closed simplex (pt0, pt1] and IC constraints. @@ -368,8 +383,9 @@ def intersect(C, n, weights, IC, pt0, pt1, tol): return n + @njit -def update_u(u, W): +def _update_u(u, W): """ Update the threat points if it not feasible in the new W, by the minimum of new feasible payoffs. From d1b803214259c35039152dc51a32b7bbba549eac Mon Sep 17 00:00:00 2001 From: Zejin Shi Date: Sun, 18 Nov 2018 19:50:12 -0700 Subject: [PATCH 11/14] Use `equilibrium_payoffs` interface. --- quantecon/game_theory/repeated_game.py | 195 +++++++++++------- .../game_theory/tests/test_repeated_game.py | 10 +- 2 files changed, 125 insertions(+), 80 deletions(-) diff --git a/quantecon/game_theory/repeated_game.py b/quantecon/game_theory/repeated_game.py index 4fdba4253..f8f209234 100644 --- a/quantecon/game_theory/repeated_game.py +++ b/quantecon/game_theory/repeated_game.py @@ -27,97 +27,140 @@ def __init__(self, stage_game, delta): self.N = stage_game.N self.nums_actions = stage_game.nums_actions - def AS(self, tol=1e-12, max_iter=500, u_init=np.zeros(2)): + def equilibrium_payoffs(self, method=None, options=None): """ - Using AS algorithm to compute the set of payoff pairs of all - pure-strategy subgame-perfect equilibria with public randomization - for any repeated two-player games with perfect monitoring and - discounting, following Abreu and Sannikov (2014). + Compute the set of payoff pairs of all pure-strategy subgame-perfect + equilibria with public randomization for any repeated two-player games + with perfect monitoring and discounting. Parameters ---------- - rpd : RepeatedGame - Two player repeated game. + method : str, optional(default='abreu_sannikov') + The method for solving the equilibrium payoff set. + + options : dict, optional(default={}) + A dictionary of method options. For example, 'abreu_sannikov' + method accepts the following options: + + tol : scalar(float) + Tolerance for convergence checking. + max_iter : scalar(int) + Maximum number of iterations. + u_init : ndarray(float, ndim=1) + The initial guess of threat points. + + Notes + ----- + Here lists all the implemented methods. The default method + is 'abreu_sannikov'. + + 1. 'abreu_sannikov' + """ + if method is None: + method = 'abreu_sannikov' - tol : scalar(float), optional(default=1e-12) - Tolerance for convergence checking. + if options is None: + options = {} - max_iter : scalar(int), optional(default=500) - Maximum number of iterations. + if method in ('abreu_sannikov', 'AS'): + return _equilibrium_payoffs_abreu_sannikov(self, **options) + else: + msg = f"method {method} not supported." + raise NotImplementedError(msg) - u_init : ndarray(float, ndim=1), optional(default=np.zeros(2)) - The initial guess of threat points. - Returns - ------- - hull : scipy.spatial.ConvexHull - The convex hull of equilibrium payoff pairs. +def _equilibrium_payoffs_abreu_sannikov(rpg, tol=1e-12, max_iter=500, + u_init=np.zeros(2)): + """ + Using 'abreu_sannikov' algorithm to compute the set of payoff pairs + of all pure-strategy subgame-perfect equilibria with public randomization + for any repeated two-player games with perfect monitoring and + discounting, following Abreu and Sannikov (2014). - References - ---------- - .. [1] Abreu, Dilip, and Yuliy Sannikov. "An algorithm for - two‐player repeated games with perfect monitoring." Theoretical - Economics 9.2 (2014): 313-338. - """ - sg, delta = self.sg, self.delta + Parameters + ---------- + rpg : RepeatedGame + Two player repeated game. - if sg.N != 2: - msg = "this algorithm only applies to repeated two-player games." - raise NotImplementedError(msg) + tol : scalar(float), optional(default=1e-12) + Tolerance for convergence checking. - best_dev_gains = _best_dev_gains(sg, delta) - C = np.empty((4, 2)) - IC = np.empty(2) - action_profile_payoff = np.empty(2) - # auxiliary array for checking if payoff is inside the convex hull - # first two entries for payoff point, and the last entry is 1. - extended_payoff = np.ones(3) - # array to store new points of C in each intersection - # at most 4 new points will be generated - new_pts = np.empty((4, 2)) - # array to store the points of W - # the length of v is limited by |A1|*|A2|*4 - W_new = np.empty((np.prod(sg.nums_actions)*4, 2)) - W_old = np.empty((np.prod(sg.nums_actions)*4, 2)) - # count the new points generated in each iteration - n_new_pt = 0 - - # copy the threat points - u = np.copy(u_init) - - # initialization - payoff_pts = \ - sg.payoff_profile_array.reshape(np.prod(sg.nums_actions), 2) - W_new[:np.prod(sg.nums_actions)] = payoff_pts - n_new_pt = np.prod(sg.nums_actions) - - n_iter = 0 - while True: - W_old[:n_new_pt] = W_new[:n_new_pt] - n_old_pt = n_new_pt - hull = ConvexHull(W_old[:n_old_pt]) - - W_new, n_new_pt = \ - _R(delta, sg.nums_actions, sg.payoff_arrays, - best_dev_gains, hull.points, hull.vertices, - hull.equations, u, IC, action_profile_payoff, - extended_payoff, new_pts, W_new) - - n_iter += 1 - if n_iter >= max_iter: - break + max_iter : scalar(int), optional(default=500) + Maximum number of iterations. + + u_init : ndarray(float, ndim=1), optional(default=np.zeros(2)) + The initial guess of threat points. - # check convergence - if n_new_pt == n_old_pt: - if np.linalg.norm(W_new[:n_new_pt] - W_old[:n_new_pt]) < tol: - break + Returns + ------- + hull : scipy.spatial.ConvexHull + The convex hull of equilibrium payoff pairs. + + References + ---------- + .. [1] Abreu, Dilip, and Yuliy Sannikov. "An algorithm for + two‐player repeated games with perfect monitoring." Theoretical + Economics 9.2 (2014): 313-338. + """ + sg, delta = rpg.sg, rpg.delta + + if sg.N != 2: + msg = "this algorithm only applies to repeated two-player games." + raise NotImplementedError(msg) + + best_dev_gains = _best_dev_gains(sg, delta) + C = np.empty((4, 2)) + IC = np.empty(2) + action_profile_payoff = np.empty(2) + # auxiliary array for checking if payoff is inside the convex hull + # first two entries for payoff point, and the last entry is 1. + extended_payoff = np.ones(3) + # array to store new points of C in each intersection + # at most 4 new points will be generated + new_pts = np.empty((4, 2)) + # array to store the points of W + # the length of v is limited by |A1|*|A2|*4 + W_new = np.empty((np.prod(sg.nums_actions)*4, 2)) + W_old = np.empty((np.prod(sg.nums_actions)*4, 2)) + # count the new points generated in each iteration + n_new_pt = 0 + + # copy the threat points + u = np.copy(u_init) + + # initialization + payoff_pts = \ + sg.payoff_profile_array.reshape(np.prod(sg.nums_actions), 2) + W_new[:np.prod(sg.nums_actions)] = payoff_pts + n_new_pt = np.prod(sg.nums_actions) + + n_iter = 0 + while True: + W_old[:n_new_pt] = W_new[:n_new_pt] + n_old_pt = n_new_pt + hull = ConvexHull(W_old[:n_old_pt]) + + W_new, n_new_pt = \ + _R(delta, sg.nums_actions, sg.payoff_arrays, + best_dev_gains, hull.points, hull.vertices, + hull.equations, u, IC, action_profile_payoff, + extended_payoff, new_pts, W_new) + + n_iter += 1 + if n_iter >= max_iter: + break + + # check convergence + if n_new_pt == n_old_pt: + if np.linalg.norm(W_new[:n_new_pt] - W_old[:n_new_pt]) < tol: + break - # update threat points - _update_u(u, W_new[:n_new_pt]) + # update threat points + _update_u(u, W_new[:n_new_pt]) - hull = ConvexHull(W_new[:n_new_pt]) + hull = ConvexHull(W_new[:n_new_pt]) - return hull + return hull def _best_dev_gains(sg, delta): diff --git a/quantecon/game_theory/tests/test_repeated_game.py b/quantecon/game_theory/tests/test_repeated_game.py index d0572a7fc..34abef8d0 100644 --- a/quantecon/game_theory/tests/test_repeated_game.py +++ b/quantecon/game_theory/tests/test_repeated_game.py @@ -6,6 +6,7 @@ from numpy.testing import assert_allclose from quantecon.game_theory import Player, NormalFormGame, RepeatedGame + class TestAS(): def setUp(self): self.game_dicts = [] @@ -29,7 +30,7 @@ def setUp(self): # Prisoner's dilemma bimatrix = [[(9, 9), (1, 10)], [(10, 1), (3, 3)]] - vertices = np.array([[3. , 3. ], + vertices = np.array([[3. , 3.], [9.75, 3. ], [9. , 9. ], [3. , 9.75]]) @@ -39,11 +40,12 @@ def setUp(self): 'u': np.array([3., 3.])} self.game_dicts.append(d) - def test_AS(self): + def test_abreu_sannikov(self): for d in self.game_dicts: rpg = RepeatedGame(d['sg'], d['delta']) - hull = rpg.AS(u_init=d['u']) - assert_allclose(hull.points[hull.vertices], d['vertices']) + for method in ('abreu_sannikov', 'AS'): + hull = rpg.equilibrium_payoffs(options={'u_init': d['u']}) + assert_allclose(hull.points[hull.vertices], d['vertices']) if __name__ == '__main__': import sys From 95d6549c0529d0872f1d4758b932946b5d1b15c2 Mon Sep 17 00:00:00 2001 From: Zejin Shi <> Date: Sun, 18 Nov 2018 21:21:11 -0700 Subject: [PATCH 12/14] FIX: minor modification of docstrings. --- quantecon/game_theory/repeated_game.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/quantecon/game_theory/repeated_game.py b/quantecon/game_theory/repeated_game.py index f8f209234..69445e973 100644 --- a/quantecon/game_theory/repeated_game.py +++ b/quantecon/game_theory/repeated_game.py @@ -35,10 +35,10 @@ def equilibrium_payoffs(self, method=None, options=None): Parameters ---------- - method : str, optional(default='abreu_sannikov') + method : str, optional The method for solving the equilibrium payoff set. - options : dict, optional(default={}) + options : dict, optional A dictionary of method options. For example, 'abreu_sannikov' method accepts the following options: From e32c21d97c4cfc7d125d444cf89fb771fdd1102e Mon Sep 17 00:00:00 2001 From: Zejin Shi <> Date: Thu, 22 Nov 2018 00:13:48 -0700 Subject: [PATCH 13/14] Minor modifications of docstrings. --- quantecon/game_theory/repeated_game.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/quantecon/game_theory/repeated_game.py b/quantecon/game_theory/repeated_game.py index 69445e973..919e26cf6 100644 --- a/quantecon/game_theory/repeated_game.py +++ b/quantecon/game_theory/repeated_game.py @@ -5,7 +5,7 @@ import numpy as np from scipy.spatial import ConvexHull -from numba import jit, njit +from numba import njit class RepeatedGame: @@ -15,11 +15,24 @@ class RepeatedGame: Parameters ---------- stage_game : NormalFormGame - The stage game used to create the repeated game. + The stage game used to create the repeated game. delta : scalar(float) - The common discount rate at which all players discount the future. + The common discount rate at which all players discount the future. + Attributes + ---------- + sg : NormalFormGame + The stage game. See Parameters. + + delta : scalar(float) + See Parameters. + + N : scalar(int) + The number of players. + + nums_actions : tuple(int) + Tuple of the numbers of actions, one for each player. """ def __init__(self, stage_game, delta): self.sg = stage_game From 070e9008524c599855e0213afe0fb24756a6025e Mon Sep 17 00:00:00 2001 From: Zejin Shi <> Date: Thu, 22 Nov 2018 00:25:30 -0700 Subject: [PATCH 14/14] Modification of `_best_dev_gains`. --- quantecon/game_theory/repeated_game.py | 36 ++++++++++---------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/quantecon/game_theory/repeated_game.py b/quantecon/game_theory/repeated_game.py index 919e26cf6..7b94d5017 100644 --- a/quantecon/game_theory/repeated_game.py +++ b/quantecon/game_theory/repeated_game.py @@ -121,7 +121,7 @@ def _equilibrium_payoffs_abreu_sannikov(rpg, tol=1e-12, max_iter=500, msg = "this algorithm only applies to repeated two-player games." raise NotImplementedError(msg) - best_dev_gains = _best_dev_gains(sg, delta) + best_dev_gains = _best_dev_gains(rpg) C = np.empty((4, 2)) IC = np.empty(2) action_profile_payoff = np.empty(2) @@ -176,39 +176,31 @@ def _equilibrium_payoffs_abreu_sannikov(rpg, tol=1e-12, max_iter=500, return hull -def _best_dev_gains(sg, delta): +def _best_dev_gains(rpg): """ Calculate the normalized payoff gains from deviating from the current action to the best response for each player. Parameters ---------- - sg : NormalFormGame - The stage game. - - delta : scalar(float) - The common discount rate at which all players discount the future. + rpg : RepeatedGame + Two player repeated game. Returns ------- - best_dev_gains0 : ndarray(float, ndim=2) - The normalized best deviation payoff gain arrays. - best_dev_gains[0][a0, a1] is normalized payoff gain - player 0 can get if originally players are choosing - a0 and a1, and player 0 deviates to the best response action. - - best_dev_gains1 : ndarray(float, ndim=2) + best_dev_gains : tuple(ndarray(float, ndim=2)) The normalized best deviation payoff gain arrays. - best_dev_gains[1][a1, a0] is normalized payoff gain - player 1 can get if originally players are choosing - a1 and a0, and player 1 deviates to the best response action. + best_dev_gains[i][ai, a-i] is normalized payoff gain + player i can get if originally players are choosing + ai and a-i, and player i deviates to the best response action. """ - best_dev_gains0 = (1-delta)/delta * \ - (np.max(sg.payoff_arrays[0], 0) - sg.payoff_arrays[0]) - best_dev_gains1 = (1-delta)/delta * \ - (np.max(sg.payoff_arrays[1], 0) - sg.payoff_arrays[1]) + sg, delta = rpg.sg, rpg.delta + + best_dev_gains = ((1-delta)/delta * + (np.max(sg.payoff_arrays[i], 0) - sg.payoff_arrays[i]) + for i in range(2)) - return best_dev_gains0, best_dev_gains1 + return tuple(best_dev_gains) @njit