From 3bdc97e4b094d4467219eb726fba5fd59c756db4 Mon Sep 17 00:00:00 2001
From: Daisuke Oyama <oyama@e.u-tokyo.ac.jp>
Date: Fri, 18 Mar 2016 23:59:52 +0900
Subject: [PATCH 1/9] DiscreteDP: Allow beta=1

---
 quantecon/markov/ddp.py | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py
index 54d9ec979..4f4efb6c1 100644
--- a/quantecon/markov/ddp.py
+++ b/quantecon/markov/ddp.py
@@ -109,6 +109,7 @@
 
 """
 from __future__ import division
+import warnings
 import numpy as np
 import scipy.sparse as sp
 
@@ -165,7 +166,7 @@ class DiscreteDP(object):
         Transition probability array.
 
     beta : scalar(float)
-        Discount factor. Must be 0 <= beta < 1.
+        Discount factor. Must be in [0, 1].
 
     s_indices : array_like(int, ndim=1), optional(default=None)
         Array containing the indices of the states.
@@ -190,6 +191,13 @@ class DiscreteDP(object):
     max_iter : scalar(int), default=250
         Default value for the maximum number of iterations.
 
+    Notes
+    -----
+    DiscreteDP accepts beta=1 for convenience. In this case, infinite
+    horizon solution methods are disabled, and the instance is then seen
+    as merely an object carrying the Bellman operator, which may be used
+    for backward induction for finite horizon problems.
+
     Examples
     --------
     Consider the following example, taken from Puterman (2005), Section
@@ -401,8 +409,12 @@ def s_wise_max(vals, out=None, out_argmax=None):
         # Check that for every state, at least one action is feasible
         self._check_action_feasibility()
 
-        if not (0 <= beta < 1):
-            raise ValueError('beta must be in [0, 1)')
+        if not (0 <= beta <= 1):
+            raise ValueError('beta must be in [0, 1]')
+        if beta == 1:
+            msg = 'infinite horizon solution methods are disabled with beta=1'
+            warnings.warn(msg)
+            self._error_msg_no_discounting = 'method invalid for beta=1'
         self.beta = beta
 
         self.epsilon = 1e-3
@@ -561,6 +573,9 @@ def evaluate_policy(self, sigma):
             Value vector of `sigma`, of length n.
 
         """
+        if self.beta == 1:
+            raise NotImplementedError(self._error_msg_no_discounting)
+
         # Solve (I - beta * Q_sigma) v = R_sigma for v
         R_sigma, Q_sigma = self.RQ_sigma(sigma)
         b = R_sigma
@@ -678,6 +693,9 @@ def value_iteration(self, v_init=None, epsilon=None, max_iter=None):
         `solve` method.
 
         """
+        if self.beta == 1:
+            raise NotImplementedError(self._error_msg_no_discounting)
+
         if max_iter is None:
             max_iter = self.max_iter
         if epsilon is None:
@@ -718,6 +736,9 @@ def policy_iteration(self, v_init=None, max_iter=None):
         `solve` method.
 
         """
+        if self.beta == 1:
+            raise NotImplementedError(self._error_msg_no_discounting)
+
         if max_iter is None:
             max_iter = self.max_iter
 
@@ -755,6 +776,9 @@ def modified_policy_iteration(self, v_init=None, epsilon=None,
         the `solve` method.
 
         """
+        if self.beta == 1:
+            raise NotImplementedError(self._error_msg_no_discounting)
+
         if max_iter is None:
             max_iter = self.max_iter
         if epsilon is None:

From 25c7d7c92c1cf7999858e735b28c18439c532b13 Mon Sep 17 00:00:00 2001
From: Daisuke Oyama <oyama@e.u-tokyo.ac.jp>
Date: Sat, 19 Mar 2016 20:35:47 +0900
Subject: [PATCH 2/9] DiscreteDP: Add test_ddp_beta_1_not_implemented_error

---
 quantecon/markov/tests/test_ddp.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/quantecon/markov/tests/test_ddp.py b/quantecon/markov/tests/test_ddp.py
index ed744238f..31e181090 100644
--- a/quantecon/markov/tests/test_ddp.py
+++ b/quantecon/markov/tests/test_ddp.py
@@ -213,6 +213,30 @@ def test_ddp_no_feasibile_action_error():
     assert_raises(ValueError, DiscreteDP, R, Q, beta, s_indices, a_indices)
 
 
+def test_ddp_beta_1_not_implemented_error():
+    n, m = 3, 2
+    R = np.array([[0, 1], [1, 0], [0, 1]])
+    Q = np.empty((n, m, n))
+    Q[:] = 1/n
+    beta = 1
+
+    ddp0 = DiscreteDP(R, Q, beta)
+    s_indices, a_indices = np.where(R > -np.inf)
+    R_sa = R[s_indices, a_indices]
+    Q_sa = Q[s_indices, a_indices]
+    ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
+    Q_sa_sp = sparse.csr_matrix(Q_sa)
+    ddp2 = DiscreteDP(R_sa, Q_sa_sp, beta, s_indices, a_indices)
+
+    solution_methods = \
+        ['value_iteration', 'policy_iteration', 'modified_policy_iteration']
+
+    for ddp in [ddp0, ddp1, ddp2]:
+        assert_raises(NotImplementedError, ddp.evaluate_policy, np.zeros(n))
+        for method in solution_methods:
+            assert_raises(NotImplementedError, getattr(ddp, method))
+
+
 if __name__ == '__main__':
     import sys
     import nose

From b18ab8f700d23931aca2db4ab878beb6b2ea7ba3 Mon Sep 17 00:00:00 2001
From: Daisuke Oyama <oyama@e.u-tokyo.ac.jp>
Date: Sat, 19 Mar 2016 20:36:55 +0900
Subject: [PATCH 3/9] DiscreteDP: Remove util.numba_installed

---
 quantecon/markov/ddp.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py
index 4f4efb6c1..c54f7a63c 100644
--- a/quantecon/markov/ddp.py
+++ b/quantecon/markov/ddp.py
@@ -112,9 +112,9 @@
 import warnings
 import numpy as np
 import scipy.sparse as sp
+from numba import jit
 
 from .core import MarkovChain
-from ..util import numba_installed, jit
 
 
 class DiscreteDP(object):
@@ -897,6 +897,7 @@ def __dir__(self):
         return self.keys()
 
 
+@jit(nopython=True)
 def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax):
     n = len(out_max)
     for i in range(n):
@@ -908,10 +909,8 @@ def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax):
             out_max[i] = vals[m]
             out_argmax[i] = a_indices[m]
 
-if numba_installed:
-    _s_wise_max_argmax = jit(nopython=True)(_s_wise_max_argmax)
-
 
+@jit(nopython=True)
 def _s_wise_max(a_indices, a_indptr, vals, out_max):
     n = len(out_max)
     for i in range(n):
@@ -922,10 +921,8 @@ def _s_wise_max(a_indices, a_indptr, vals, out_max):
                     m = j
             out_max[i] = vals[m]
 
-if numba_installed:
-    _s_wise_max = jit(nopython=True)(_s_wise_max)
-
 
+@jit(nopython=True)
 def _find_indices(a_indices, a_indptr, sigma, out):
     n = len(sigma)
     for i in range(n):
@@ -933,9 +930,6 @@ def _find_indices(a_indices, a_indptr, sigma, out):
             if sigma[i] == a_indices[j]:
                 out[i] = j
 
-if numba_installed:
-    _find_indices = jit(nopython=True)(_find_indices)
-
 
 @jit(nopython=True)
 def _has_sorted_sa_indices(s_indices, a_indices):

From 7432f316647a4049b2a6dbf83a618cb069d070d1 Mon Sep 17 00:00:00 2001
From: Daisuke Oyama <oyama@e.u-tokyo.ac.jp>
Date: Mon, 28 Mar 2016 23:31:14 +0900
Subject: [PATCH 4/9] ddp: Add backward_induction

---
 quantecon/markov/__init__.py |  2 +-
 quantecon/markov/ddp.py      | 66 ++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/quantecon/markov/__init__.py b/quantecon/markov/__init__.py
index 26cdb080e..a063d2e81 100644
--- a/quantecon/markov/__init__.py
+++ b/quantecon/markov/__init__.py
@@ -8,4 +8,4 @@
 from .random import random_markov_chain, random_stochastic_matrix, \
     random_discrete_dp
 from .approximation import tauchen
-from .ddp import DiscreteDP
+from .ddp import DiscreteDP, backward_induction
diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py
index c54f7a63c..c6cd01d21 100644
--- a/quantecon/markov/ddp.py
+++ b/quantecon/markov/ddp.py
@@ -897,6 +897,72 @@ def __dir__(self):
         return self.keys()
 
 
+def backward_induction(ddp, T, v_T=None):
+    r"""
+    Solve by backward induction a :math:`T`-period finite horizon
+    discrete dynamic program with stationary reward and transition
+    probability functions :math:`r` and :math:`q` and discount factor
+    :math:`\beta \in [0, 1]`.
+
+    The optimal value functions :math:`v^*_0, \ldots, v^*_T` and policy
+    functions :math:`\sigma^*_0, \ldots, \sigma^*_{T-1}` are obtained by
+    :math:`v^*_T = v_T`, and
+
+    .. math::
+
+        v^*_{t-1}(s) = \max_{a \in A(s)} r(s, a) +
+            \beta \sum_{s' \in S} q(s'|s, a) v^*_t(s')
+            \quad (s \in S)
+
+    and
+
+    .. math::
+
+        \sigma^*_{t-1}(s) \in \operatorname*{arg\,max}_{a \in A(s)}
+            r(s, a) + \beta \sum_{s' \in S} q(s'|s, a) v^*_t(s')
+            \quad (s \in S)
+
+    for :math:`t = T, \ldots, 1`, where the terminal value function
+    :math:`v_T` is exogenously given.
+
+    Parameters
+    ----------
+    ddp : DiscreteDP
+        DiscreteDP instance storing reward array `R`, transition
+        probability array `Q`, and discount factor `beta`.
+
+    T : scalar(int)
+        Number of decision periods.
+
+    v_T : array_like(float, ndim=1), optional(default=None)
+        Terminal value function, of length equal to n (the number of
+        states). If None, it defaults to the vector of zeros.
+
+    Returns
+    -------
+    vs : ndarray(float, ndim=2)
+        Array of shape (T+1, n) where `vs[t]` contains the optimal
+        value function at period `t = 0, ..., T`.
+
+    sigmas : ndarray(float, ndim=2)
+        Array of shape (T, n) where `sigmas[t]` contains the optimal
+        policy function at period `t = 0, ..., T-1`.
+
+    """
+    n = ddp.num_states
+    vs = np.empty((T+1, n))
+    sigmas = np.empty((T, n), dtype=int)
+
+    if v_T is None:
+        v_T = np.zeros(n)
+    vs[T] = v_T
+
+    for t in range(T, 0, -1):
+        ddp.bellman_operator(vs[t], Tv=vs[t-1], sigma=sigmas[t-1])
+
+    return vs, sigmas
+
+
 @jit(nopython=True)
 def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax):
     n = len(out_max)

From a84214dbe2e82d7ef9cc5be7543b7fc84ce8894f Mon Sep 17 00:00:00 2001
From: Daisuke Oyama <oyama@e.u-tokyo.ac.jp>
Date: Tue, 29 Mar 2016 22:35:38 +0900
Subject: [PATCH 5/9] ddp: Add test_backward_induction

---
 quantecon/markov/tests/test_ddp.py | 39 +++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/quantecon/markov/tests/test_ddp.py b/quantecon/markov/tests/test_ddp.py
index 31e181090..3a62e43a1 100644
--- a/quantecon/markov/tests/test_ddp.py
+++ b/quantecon/markov/tests/test_ddp.py
@@ -12,7 +12,7 @@
 from numpy.testing import assert_array_equal, assert_allclose, assert_raises
 from nose.tools import eq_, ok_
 
-from quantecon.markov import DiscreteDP
+from quantecon.markov import DiscreteDP, backward_induction
 
 
 class TestDiscreteDP:
@@ -177,6 +177,43 @@ def test_ddp_sorting():
         assert_array_equal(ddp_Q, Q)
 
 
+class TestFiniteHorizon:
+    def setUp(self):
+        # From Puterman 2005, Section 3.2, Section 4.6.1
+        # "single-product stochastic inventory control"
+        s_indices = [0, 0, 0, 0, 1, 1, 1, 2, 2, 3]
+        a_indices = [0, 1, 2, 3, 0, 1, 2, 0, 1, 0]
+        R = [ 0., -1., -2., -5.,  5.,  0., -3.,  6., -1.,  5.]
+        Q = [[ 1.  ,  0.  ,  0.  ,  0.  ],
+             [ 0.75,  0.25,  0.  ,  0.  ],
+             [ 0.25,  0.5 ,  0.25,  0.  ],
+             [ 0.  ,  0.25,  0.5 ,  0.25],
+             [ 0.75,  0.25,  0.  ,  0.  ],
+             [ 0.25,  0.5 ,  0.25,  0.  ],
+             [ 0.  ,  0.25,  0.5 ,  0.25],
+             [ 0.25,  0.5 ,  0.25,  0.  ],
+             [ 0.  ,  0.25,  0.5 ,  0.25],
+             [ 0.  ,  0.25,  0.5 ,  0.25]]
+        beta = 1
+        self.ddp = DiscreteDP(R, Q, beta, s_indices, a_indices)
+
+    def test_backward_induction(self):
+        T = 3
+        # v_T = np.zeors(self.ddp.n)
+        vs_expected = [[67/16, 129/16, 194/16, 227/16],
+                       [2, 25/4, 10, 21/2],
+                       [0, 5, 6, 5],
+                       [0, 0, 0, 0]]
+        sigmas_expected = [[3, 0, 0, 0],
+                           [2, 0, 0, 0],
+                           [0, 0, 0, 0]]
+
+        vs, sigmas = backward_induction(self.ddp, T)
+
+        assert_allclose(vs, vs_expected)
+        assert_array_equal(sigmas, sigmas_expected)
+
+
 def test_ddp_negative_inf_error():
     n, m = 3, 2
     R = np.array([[0, 1], [0, -np.inf], [-np.inf, -np.inf]])

From a009367fc0a9c02e8a63986f176987f67b5dc1db Mon Sep 17 00:00:00 2001
From: Daisuke Oyama <oyama@e.u-tokyo.ac.jp>
Date: Tue, 29 Mar 2016 22:37:38 +0900
Subject: [PATCH 6/9] Minor edit in docstring

---
 quantecon/markov/ddp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py
index c6cd01d21..37830a122 100644
--- a/quantecon/markov/ddp.py
+++ b/quantecon/markov/ddp.py
@@ -40,8 +40,8 @@
 
 The *optimal value function* :math:`v^*` is the function such that
 :math:`v^*(s) = \max_{\sigma} v_{\sigma}(s)` for all :math:`s \in S`. A
-policy function :math:`\sigma^*` is *optimal* if :math:`v_{\sigma}(s) =
-v(s)` for all :math:`s \in S`.
+policy function :math:`\sigma^*` is *optimal* if :math:`v_{\sigma^*}(s)
+= v^*(s)` for all :math:`s \in S`.
 
 The *Bellman equation* is written as
 

From d22bd41636530d9e056dd5ca435152ebb21bbce5 Mon Sep 17 00:00:00 2001
From: Daisuke Oyama <oyama@e.u-tokyo.ac.jp>
Date: Sun, 3 Apr 2016 21:35:02 +0900
Subject: [PATCH 7/9] Minor fix in docstring [ci skip]

---
 quantecon/markov/ddp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py
index 37830a122..afe478d9f 100644
--- a/quantecon/markov/ddp.py
+++ b/quantecon/markov/ddp.py
@@ -944,7 +944,7 @@ def backward_induction(ddp, T, v_T=None):
         Array of shape (T+1, n) where `vs[t]` contains the optimal
         value function at period `t = 0, ..., T`.
 
-    sigmas : ndarray(float, ndim=2)
+    sigmas : ndarray(int, ndim=2)
         Array of shape (T, n) where `sigmas[t]` contains the optimal
         policy function at period `t = 0, ..., T-1`.
 

From 7771582fd606498904483584082899dcd500abf0 Mon Sep 17 00:00:00 2001
From: Daisuke Oyama <oyama@e.u-tokyo.ac.jp>
Date: Wed, 6 Apr 2016 23:50:36 +0900
Subject: [PATCH 8/9] Minor edit: v[t] -> v[t, :] etc

just to be explicit
---
 quantecon/markov/ddp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py
index afe478d9f..d6c9380ea 100644
--- a/quantecon/markov/ddp.py
+++ b/quantecon/markov/ddp.py
@@ -955,10 +955,10 @@ def backward_induction(ddp, T, v_T=None):
 
     if v_T is None:
         v_T = np.zeros(n)
-    vs[T] = v_T
+    vs[T, :] = v_T
 
     for t in range(T, 0, -1):
-        ddp.bellman_operator(vs[t], Tv=vs[t-1], sigma=sigmas[t-1])
+        ddp.bellman_operator(vs[t, :], Tv=vs[t-1, :], sigma=sigmas[t-1, :])
 
     return vs, sigmas
 

From 6093186caa6007251c1226d184068509adcc0cd2 Mon Sep 17 00:00:00 2001
From: Daisuke Oyama <oyama@e.u-tokyo.ac.jp>
Date: Wed, 6 Apr 2016 23:59:59 +0900
Subject: [PATCH 9/9] Terminal value option: v_T -> v_term

---
 quantecon/markov/ddp.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py
index d6c9380ea..aaa638d7c 100644
--- a/quantecon/markov/ddp.py
+++ b/quantecon/markov/ddp.py
@@ -897,7 +897,7 @@ def __dir__(self):
         return self.keys()
 
 
-def backward_induction(ddp, T, v_T=None):
+def backward_induction(ddp, T, v_term=None):
     r"""
     Solve by backward induction a :math:`T`-period finite horizon
     discrete dynamic program with stationary reward and transition
@@ -934,7 +934,7 @@ def backward_induction(ddp, T, v_T=None):
     T : scalar(int)
         Number of decision periods.
 
-    v_T : array_like(float, ndim=1), optional(default=None)
+    v_term : array_like(float, ndim=1), optional(default=None)
         Terminal value function, of length equal to n (the number of
         states). If None, it defaults to the vector of zeros.
 
@@ -953,9 +953,9 @@ def backward_induction(ddp, T, v_T=None):
     vs = np.empty((T+1, n))
     sigmas = np.empty((T, n), dtype=int)
 
-    if v_T is None:
-        v_T = np.zeros(n)
-    vs[T, :] = v_T
+    if v_term is None:
+        v_term = np.zeros(n)
+    vs[T, :] = v_term
 
     for t in range(T, 0, -1):
         ddp.bellman_operator(vs[t, :], Tv=vs[t-1, :], sigma=sigmas[t-1, :])