From 3bdc97e4b094d4467219eb726fba5fd59c756db4 Mon Sep 17 00:00:00 2001 From: Daisuke Oyama Date: Fri, 18 Mar 2016 23:59:52 +0900 Subject: [PATCH 1/9] DiscreteDP: Allow beta=1 --- quantecon/markov/ddp.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py index 54d9ec979..4f4efb6c1 100644 --- a/quantecon/markov/ddp.py +++ b/quantecon/markov/ddp.py @@ -109,6 +109,7 @@ """ from __future__ import division +import warnings import numpy as np import scipy.sparse as sp @@ -165,7 +166,7 @@ class DiscreteDP(object): Transition probability array. beta : scalar(float) - Discount factor. Must be 0 <= beta < 1. + Discount factor. Must be in [0, 1]. s_indices : array_like(int, ndim=1), optional(default=None) Array containing the indices of the states. @@ -190,6 +191,13 @@ class DiscreteDP(object): max_iter : scalar(int), default=250 Default value for the maximum number of iterations. + Notes + ----- + DiscreteDP accepts beta=1 for convenience. In this case, infinite + horizon solution methods are disabled, and the instance is then seen + as merely an object carrying the Bellman operator, which may be used + for backward induction for finite horizon problems. + Examples -------- Consider the following example, taken from Puterman (2005), Section @@ -401,8 +409,12 @@ def s_wise_max(vals, out=None, out_argmax=None): # Check that for every state, at least one action is feasible self._check_action_feasibility() - if not (0 <= beta < 1): - raise ValueError('beta must be in [0, 1)') + if not (0 <= beta <= 1): + raise ValueError('beta must be in [0, 1]') + if beta == 1: + msg = 'infinite horizon solution methods are disabled with beta=1' + warnings.warn(msg) + self._error_msg_no_discounting = 'method invalid for beta=1' self.beta = beta self.epsilon = 1e-3 @@ -561,6 +573,9 @@ def evaluate_policy(self, sigma): Value vector of `sigma`, of length n. """ + if self.beta == 1: + raise NotImplementedError(self._error_msg_no_discounting) + # Solve (I - beta * Q_sigma) v = R_sigma for v R_sigma, Q_sigma = self.RQ_sigma(sigma) b = R_sigma @@ -678,6 +693,9 @@ def value_iteration(self, v_init=None, epsilon=None, max_iter=None): `solve` method. """ + if self.beta == 1: + raise NotImplementedError(self._error_msg_no_discounting) + if max_iter is None: max_iter = self.max_iter if epsilon is None: @@ -718,6 +736,9 @@ def policy_iteration(self, v_init=None, max_iter=None): `solve` method. """ + if self.beta == 1: + raise NotImplementedError(self._error_msg_no_discounting) + if max_iter is None: max_iter = self.max_iter @@ -755,6 +776,9 @@ def modified_policy_iteration(self, v_init=None, epsilon=None, the `solve` method. """ + if self.beta == 1: + raise NotImplementedError(self._error_msg_no_discounting) + if max_iter is None: max_iter = self.max_iter if epsilon is None: From 25c7d7c92c1cf7999858e735b28c18439c532b13 Mon Sep 17 00:00:00 2001 From: Daisuke Oyama Date: Sat, 19 Mar 2016 20:35:47 +0900 Subject: [PATCH 2/9] DiscreteDP: Add test_ddp_beta_1_not_implemented_error --- quantecon/markov/tests/test_ddp.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/quantecon/markov/tests/test_ddp.py b/quantecon/markov/tests/test_ddp.py index ed744238f..31e181090 100644 --- a/quantecon/markov/tests/test_ddp.py +++ b/quantecon/markov/tests/test_ddp.py @@ -213,6 +213,30 @@ def test_ddp_no_feasibile_action_error(): assert_raises(ValueError, DiscreteDP, R, Q, beta, s_indices, a_indices) +def test_ddp_beta_1_not_implemented_error(): + n, m = 3, 2 + R = np.array([[0, 1], [1, 0], [0, 1]]) + Q = np.empty((n, m, n)) + Q[:] = 1/n + beta = 1 + + ddp0 = DiscreteDP(R, Q, beta) + s_indices, a_indices = np.where(R > -np.inf) + R_sa = R[s_indices, a_indices] + Q_sa = Q[s_indices, a_indices] + ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices) + Q_sa_sp = sparse.csr_matrix(Q_sa) + ddp2 = DiscreteDP(R_sa, Q_sa_sp, beta, s_indices, a_indices) + + solution_methods = \ + ['value_iteration', 'policy_iteration', 'modified_policy_iteration'] + + for ddp in [ddp0, ddp1, ddp2]: + assert_raises(NotImplementedError, ddp.evaluate_policy, np.zeros(n)) + for method in solution_methods: + assert_raises(NotImplementedError, getattr(ddp, method)) + + if __name__ == '__main__': import sys import nose From b18ab8f700d23931aca2db4ab878beb6b2ea7ba3 Mon Sep 17 00:00:00 2001 From: Daisuke Oyama Date: Sat, 19 Mar 2016 20:36:55 +0900 Subject: [PATCH 3/9] DiscreteDP: Remove util.numba_installed --- quantecon/markov/ddp.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py index 4f4efb6c1..c54f7a63c 100644 --- a/quantecon/markov/ddp.py +++ b/quantecon/markov/ddp.py @@ -112,9 +112,9 @@ import warnings import numpy as np import scipy.sparse as sp +from numba import jit from .core import MarkovChain -from ..util import numba_installed, jit class DiscreteDP(object): @@ -897,6 +897,7 @@ def __dir__(self): return self.keys() +@jit(nopython=True) def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax): n = len(out_max) for i in range(n): @@ -908,10 +909,8 @@ def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax): out_max[i] = vals[m] out_argmax[i] = a_indices[m] -if numba_installed: - _s_wise_max_argmax = jit(nopython=True)(_s_wise_max_argmax) - +@jit(nopython=True) def _s_wise_max(a_indices, a_indptr, vals, out_max): n = len(out_max) for i in range(n): @@ -922,10 +921,8 @@ def _s_wise_max(a_indices, a_indptr, vals, out_max): m = j out_max[i] = vals[m] -if numba_installed: - _s_wise_max = jit(nopython=True)(_s_wise_max) - +@jit(nopython=True) def _find_indices(a_indices, a_indptr, sigma, out): n = len(sigma) for i in range(n): @@ -933,9 +930,6 @@ def _find_indices(a_indices, a_indptr, sigma, out): if sigma[i] == a_indices[j]: out[i] = j -if numba_installed: - _find_indices = jit(nopython=True)(_find_indices) - @jit(nopython=True) def _has_sorted_sa_indices(s_indices, a_indices): From 7432f316647a4049b2a6dbf83a618cb069d070d1 Mon Sep 17 00:00:00 2001 From: Daisuke Oyama Date: Mon, 28 Mar 2016 23:31:14 +0900 Subject: [PATCH 4/9] ddp: Add backward_induction --- quantecon/markov/__init__.py | 2 +- quantecon/markov/ddp.py | 66 ++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/quantecon/markov/__init__.py b/quantecon/markov/__init__.py index 26cdb080e..a063d2e81 100644 --- a/quantecon/markov/__init__.py +++ b/quantecon/markov/__init__.py @@ -8,4 +8,4 @@ from .random import random_markov_chain, random_stochastic_matrix, \ random_discrete_dp from .approximation import tauchen -from .ddp import DiscreteDP +from .ddp import DiscreteDP, backward_induction diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py index c54f7a63c..c6cd01d21 100644 --- a/quantecon/markov/ddp.py +++ b/quantecon/markov/ddp.py @@ -897,6 +897,72 @@ def __dir__(self): return self.keys() +def backward_induction(ddp, T, v_T=None): + r""" + Solve by backward induction a :math:`T`-period finite horizon + discrete dynamic program with stationary reward and transition + probability functions :math:`r` and :math:`q` and discount factor + :math:`\beta \in [0, 1]`. + + The optimal value functions :math:`v^*_0, \ldots, v^*_T` and policy + functions :math:`\sigma^*_0, \ldots, \sigma^*_{T-1}` are obtained by + :math:`v^*_T = v_T`, and + + .. math:: + + v^*_{t-1}(s) = \max_{a \in A(s)} r(s, a) + + \beta \sum_{s' \in S} q(s'|s, a) v^*_t(s') + \quad (s \in S) + + and + + .. math:: + + \sigma^*_{t-1}(s) \in \operatorname*{arg\,max}_{a \in A(s)} + r(s, a) + \beta \sum_{s' \in S} q(s'|s, a) v^*_t(s') + \quad (s \in S) + + for :math:`t = T, \ldots, 1`, where the terminal value function + :math:`v_T` is exogenously given. + + Parameters + ---------- + ddp : DiscreteDP + DiscreteDP instance storing reward array `R`, transition + probability array `Q`, and discount factor `beta`. + + T : scalar(int) + Number of decision periods. + + v_T : array_like(float, ndim=1), optional(default=None) + Terminal value function, of length equal to n (the number of + states). If None, it defaults to the vector of zeros. + + Returns + ------- + vs : ndarray(float, ndim=2) + Array of shape (T+1, n) where `vs[t]` contains the optimal + value function at period `t = 0, ..., T`. + + sigmas : ndarray(float, ndim=2) + Array of shape (T, n) where `sigmas[t]` contains the optimal + policy function at period `t = 0, ..., T-1`. + + """ + n = ddp.num_states + vs = np.empty((T+1, n)) + sigmas = np.empty((T, n), dtype=int) + + if v_T is None: + v_T = np.zeros(n) + vs[T] = v_T + + for t in range(T, 0, -1): + ddp.bellman_operator(vs[t], Tv=vs[t-1], sigma=sigmas[t-1]) + + return vs, sigmas + + @jit(nopython=True) def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax): n = len(out_max) From a84214dbe2e82d7ef9cc5be7543b7fc84ce8894f Mon Sep 17 00:00:00 2001 From: Daisuke Oyama Date: Tue, 29 Mar 2016 22:35:38 +0900 Subject: [PATCH 5/9] ddp: Add test_backward_induction --- quantecon/markov/tests/test_ddp.py | 39 +++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/quantecon/markov/tests/test_ddp.py b/quantecon/markov/tests/test_ddp.py index 31e181090..3a62e43a1 100644 --- a/quantecon/markov/tests/test_ddp.py +++ b/quantecon/markov/tests/test_ddp.py @@ -12,7 +12,7 @@ from numpy.testing import assert_array_equal, assert_allclose, assert_raises from nose.tools import eq_, ok_ -from quantecon.markov import DiscreteDP +from quantecon.markov import DiscreteDP, backward_induction class TestDiscreteDP: @@ -177,6 +177,43 @@ def test_ddp_sorting(): assert_array_equal(ddp_Q, Q) +class TestFiniteHorizon: + def setUp(self): + # From Puterman 2005, Section 3.2, Section 4.6.1 + # "single-product stochastic inventory control" + s_indices = [0, 0, 0, 0, 1, 1, 1, 2, 2, 3] + a_indices = [0, 1, 2, 3, 0, 1, 2, 0, 1, 0] + R = [ 0., -1., -2., -5., 5., 0., -3., 6., -1., 5.] + Q = [[ 1. , 0. , 0. , 0. ], + [ 0.75, 0.25, 0. , 0. ], + [ 0.25, 0.5 , 0.25, 0. ], + [ 0. , 0.25, 0.5 , 0.25], + [ 0.75, 0.25, 0. , 0. ], + [ 0.25, 0.5 , 0.25, 0. ], + [ 0. , 0.25, 0.5 , 0.25], + [ 0.25, 0.5 , 0.25, 0. ], + [ 0. , 0.25, 0.5 , 0.25], + [ 0. , 0.25, 0.5 , 0.25]] + beta = 1 + self.ddp = DiscreteDP(R, Q, beta, s_indices, a_indices) + + def test_backward_induction(self): + T = 3 + # v_T = np.zeors(self.ddp.n) + vs_expected = [[67/16, 129/16, 194/16, 227/16], + [2, 25/4, 10, 21/2], + [0, 5, 6, 5], + [0, 0, 0, 0]] + sigmas_expected = [[3, 0, 0, 0], + [2, 0, 0, 0], + [0, 0, 0, 0]] + + vs, sigmas = backward_induction(self.ddp, T) + + assert_allclose(vs, vs_expected) + assert_array_equal(sigmas, sigmas_expected) + + def test_ddp_negative_inf_error(): n, m = 3, 2 R = np.array([[0, 1], [0, -np.inf], [-np.inf, -np.inf]]) From a009367fc0a9c02e8a63986f176987f67b5dc1db Mon Sep 17 00:00:00 2001 From: Daisuke Oyama Date: Tue, 29 Mar 2016 22:37:38 +0900 Subject: [PATCH 6/9] Minor edit in docstring --- quantecon/markov/ddp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py index c6cd01d21..37830a122 100644 --- a/quantecon/markov/ddp.py +++ b/quantecon/markov/ddp.py @@ -40,8 +40,8 @@ The *optimal value function* :math:`v^*` is the function such that :math:`v^*(s) = \max_{\sigma} v_{\sigma}(s)` for all :math:`s \in S`. A -policy function :math:`\sigma^*` is *optimal* if :math:`v_{\sigma}(s) = -v(s)` for all :math:`s \in S`. +policy function :math:`\sigma^*` is *optimal* if :math:`v_{\sigma^*}(s) += v^*(s)` for all :math:`s \in S`. The *Bellman equation* is written as From d22bd41636530d9e056dd5ca435152ebb21bbce5 Mon Sep 17 00:00:00 2001 From: Daisuke Oyama Date: Sun, 3 Apr 2016 21:35:02 +0900 Subject: [PATCH 7/9] Minor fix in docstring [ci skip] --- quantecon/markov/ddp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py index 37830a122..afe478d9f 100644 --- a/quantecon/markov/ddp.py +++ b/quantecon/markov/ddp.py @@ -944,7 +944,7 @@ def backward_induction(ddp, T, v_T=None): Array of shape (T+1, n) where `vs[t]` contains the optimal value function at period `t = 0, ..., T`. - sigmas : ndarray(float, ndim=2) + sigmas : ndarray(int, ndim=2) Array of shape (T, n) where `sigmas[t]` contains the optimal policy function at period `t = 0, ..., T-1`. From 7771582fd606498904483584082899dcd500abf0 Mon Sep 17 00:00:00 2001 From: Daisuke Oyama Date: Wed, 6 Apr 2016 23:50:36 +0900 Subject: [PATCH 8/9] Minor edit: v[t] -> v[t, :] etc just to be explicit --- quantecon/markov/ddp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py index afe478d9f..d6c9380ea 100644 --- a/quantecon/markov/ddp.py +++ b/quantecon/markov/ddp.py @@ -955,10 +955,10 @@ def backward_induction(ddp, T, v_T=None): if v_T is None: v_T = np.zeros(n) - vs[T] = v_T + vs[T, :] = v_T for t in range(T, 0, -1): - ddp.bellman_operator(vs[t], Tv=vs[t-1], sigma=sigmas[t-1]) + ddp.bellman_operator(vs[t, :], Tv=vs[t-1, :], sigma=sigmas[t-1, :]) return vs, sigmas From 6093186caa6007251c1226d184068509adcc0cd2 Mon Sep 17 00:00:00 2001 From: Daisuke Oyama Date: Wed, 6 Apr 2016 23:59:59 +0900 Subject: [PATCH 9/9] Terminal value option: v_T -> v_term --- quantecon/markov/ddp.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py index d6c9380ea..aaa638d7c 100644 --- a/quantecon/markov/ddp.py +++ b/quantecon/markov/ddp.py @@ -897,7 +897,7 @@ def __dir__(self): return self.keys() -def backward_induction(ddp, T, v_T=None): +def backward_induction(ddp, T, v_term=None): r""" Solve by backward induction a :math:`T`-period finite horizon discrete dynamic program with stationary reward and transition @@ -934,7 +934,7 @@ def backward_induction(ddp, T, v_T=None): T : scalar(int) Number of decision periods. - v_T : array_like(float, ndim=1), optional(default=None) + v_term : array_like(float, ndim=1), optional(default=None) Terminal value function, of length equal to n (the number of states). If None, it defaults to the vector of zeros. @@ -953,9 +953,9 @@ def backward_induction(ddp, T, v_T=None): vs = np.empty((T+1, n)) sigmas = np.empty((T, n), dtype=int) - if v_T is None: - v_T = np.zeros(n) - vs[T, :] = v_T + if v_term is None: + v_term = np.zeros(n) + vs[T, :] = v_term for t in range(T, 0, -1): ddp.bellman_operator(vs[t, :], Tv=vs[t-1, :], sigma=sigmas[t-1, :])