Skip to content
Merged
12 changes: 12 additions & 0 deletions docs/source/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,15 @@ @article{zou2005
journal = {Journal of the Royal Statistical Society Series B},
doi = {10.1111/j.1467-9868.2005.00527.x}
}

@article{Goldstein2014,
author={Goldstein, Tom and O’Donoghue, Brendan and Setzer, Simon and Baraniuk, Richard},
year={2014},
month={Jan},
pages={1588–1623},
title={Fast Alternating Direction Optimization Methods},
journal={SIAM Journal on Imaging Sciences},
volume={7},
ISSN={1936-4954},
doi={10/gdwr49},
}
3 changes: 2 additions & 1 deletion modopt/opt/algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
r"""OPTIMISATION ALGOTITHMS.
r"""OPTIMISATION ALGORITHMS.

This module contains class implementations of various optimisation algoritms.

Expand Down Expand Up @@ -57,3 +57,4 @@
SAGAOptGradOpt,
VanillaGenericGradOpt)
from modopt.opt.algorithms.primal_dual import Condat
from modopt.opt.algorithms.admm import ADMM, FastADMM
337 changes: 337 additions & 0 deletions modopt/opt/algorithms/admm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,337 @@
"""ADMM Algorithms."""
import numpy as np

from modopt.base.backend import get_array_module
from modopt.opt.algorithms.base import SetUp
from modopt.opt.cost import CostParent


class ADMMcostObj(CostParent):
r"""Cost Object for the ADMM problem class.

Parameters
----------
cost_funcs: 2-tuples of callable
f and g function.
A : OperatorBase
First Operator
B : OperatorBase
Second Operator
Comment on lines +16 to +19
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I mentioned in the previous PR, I would really like to minimise the use of single character variables where possible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this context the variable name are chosen to stick to the paper implementation, and this also the case for other algorithms in ModOpt (POGM for instance has u,x,y,z). As we are writing scientific code, I think its best to stick to the scientific notation, easing the matching between code and the algorithm descriptions in the paper. Also, I don't find any other name to use instead.

b : numpy.ndarray
Observed data
**kwargs : dict
Extra parameters for cost operator configuration

Notes
-----
Compute :math:`f(u)+g(v) + \tau \| Au +Bv - b\|^2`

See Also
--------
CostParent: parent class
"""

def __init__(self, cost_funcs, A, B, b, tau, **kwargs):
super().__init__(*kwargs)
self.cost_funcs = cost_funcs
self.A = A
self.B = B
self.b = b
self.tau = tau

def _calc_cost(self, u, v, **kwargs):
"""Calculate the cost.

This method calculates the cost from each of the input operators.

Parameters
----------
u: numpy.ndarray
First primal variable of ADMM
v: numpy.ndarray
Second primal variable of ADMM

Returns
-------
float
Cost value

"""
xp = get_array_module(u)
cost = self.cost_funcs[0](u)
cost += self.cost_funcs[1](v)
cost += self.tau * xp.linalg.norm(self.A.op(u) + self.B.op(v) - self.b)
return cost


class ADMM(SetUp):
r"""Fast ADMM Optimisation Algorihm.

This class implement the ADMM algorithm described in :cite:`Goldstein2014` (Algorithm 1).

Parameters
----------
u: numpy.ndarray
Initial value for first primal variable of ADMM
v: numpy.ndarray
Initial value for second primal variable of ADMM
mu: numpy.ndarray
Initial value for lagrangian multiplier.
A : modopt.opt.linear.LinearOperator
Linear operator for u
B: modopt.opt.linear.LinearOperator
Linear operator for v
b : numpy.ndarray
Constraint vector
optimizers: tuple
2-tuple of callable, that are the optimizers for the u and v.
Each callable should access an init and obs argument and returns an estimate for:
.. math:: u_{k+1} = \argmin H(u) + \frac{\tau}{2}\|A u - y\|^2
.. math:: v_{k+1} = \argmin G(v) + \frac{\tau}{2}\|Bv - y \|^2
cost_funcs: tuple
2-tuple of callable, that compute values of H and G.
tau: float, default=1
Coupling parameter for ADMM.

Notes
-----
The algorithm solve the problem:

.. math:: u, v = \arg\min H(u) + G(v) + \frac\tau2 \|Au + Bv - b \|_2^2

with the following augmented lagrangian:

.. math :: \mathcal{L}_{\tau}(u,v, \lambda) = H(u) + G(v)
+\langle\lambda |Au + Bv -b \rangle + \frac\tau2 \| Au + Bv -b \|^2

To allow easy iterative solving, the change of variable
:math:`\mu=\lambda/\tau` is used. Hence, the lagrangian of interest is:

.. math :: \tilde{\mathcal{L}}_{\tau}(u,v, \mu) = H(u) + G(v)
+ \frac\tau2 \left(\|\mu + Au +Bv - b\|^2 - \|\mu\|^2\right)

See Also
--------
SetUp: parent class
"""

def __init__(
self,
u,
v,
mu,
A,
B,
b,
optimizers,
tau=1,
cost_funcs=None,
**kwargs,
):
super().__init__(**kwargs)
self.A = A
self.B = B
self.b = b
self._opti_H = optimizers[0]
self._opti_G = optimizers[1]
self._tau = tau
if cost_funcs is not None:
self._cost_func = ADMMcostObj(cost_funcs, A, B, b, tau)
else:
self._cost_func = None

# init iteration variables.
self._u_old = self.xp.copy(u)
self._u_new = self.xp.copy(u)
self._v_old = self.xp.copy(v)
self._v_new = self.xp.copy(v)
self._mu_new = self.xp.copy(mu)
self._mu_old = self.xp.copy(mu)

def _update(self):
self._u_new = self._opti_H(
init=self._u_old,
obs=self.B.op(self._v_old) + self._u_old - self.b,
)
tmp = self.A.op(self._u_new)
self._v_new = self._opti_G(
init=self._v_old,
obs=tmp + self._u_old - self.b,
)

self._mu_new = self._mu_old + (tmp + self.B.op(self._v_new) - self.b)

# update cycle
self._u_old = self.xp.copy(self._u_new)
self._v_old = self.xp.copy(self._v_new)
self._mu_old = self.xp.copy(self._mu_new)

# Test cost function for convergence.
if self._cost_func:
self.converge = self.any_convergence_flag()
self.converge |= self._cost_func.get_cost(self._u_new, self._v_new)

def iterate(self, max_iter=150):
"""Iterate.

This method calls update until either convergence criteria is met or
the maximum number of iterations is reached.

Parameters
----------
max_iter : int, optional
Maximum number of iterations (default is ``150``)
"""
self._run_alg(max_iter)

# retrieve metrics results
self.retrieve_outputs()
# rename outputs as attributes
self.u_final = self._u_new
self.x_final = self.u_final # for backward compatibility
self.v_final = self._v_new

def get_notify_observers_kwargs(self):
"""Notify observers.

Return the mapping between the metrics call and the iterated
variables.

Returns
-------
dict
The mapping between the iterated variables
"""
return {
'x_new': self._u_new,
'v_new': self._v_new,
'idx': self.idx,
}

def retrieve_outputs(self):
"""Retrieve outputs.

Declare the outputs of the algorithms as attributes: x_final,
y_final, metrics.
"""
metrics = {}
for obs in self._observers['cv_metrics']:
metrics[obs.name] = obs.retrieve_metrics()
self.metrics = metrics


class FastADMM(ADMM):
r"""Fast ADMM Optimisation Algorihm.

This class implement the fast ADMM algorithm
(Algorithm 8 from :cite:`Goldstein2014`)

Parameters
----------
u: numpy.ndarray
Initial value for first primal variable of ADMM
v: numpy.ndarray
Initial value for second primal variable of ADMM
mu: numpy.ndarray
Initial value for lagrangian multiplier.
A : modopt.opt.linear.LinearOperator
Linear operator for u
B: modopt.opt.linear.LinearOperator
Linear operator for v
b : numpy.ndarray
Constraint vector
optimizers: tuple
2-tuple of callable, that are the optimizers for the u and v.
Each callable should access an init and obs argument and returns an estimate for:
.. math:: u_{k+1} = \argmin H(u) + \frac{\tau}{2}\|A u - y\|^2
.. math:: v_{k+1} = \argmin G(v) + \frac{\tau}{2}\|Bv - y \|^2
cost_funcs: tuple
2-tuple of callable, that compute values of H and G.
tau: float, default=1
Coupling parameter for ADMM.
eta: float, default=0.999
Convergence parameter for ADMM.
alpha: float, default=1.
Initial value for the FISTA-like acceleration parameter.

Notes
-----
This is an accelerated version of the ADMM algorithm. The convergence hypothesis are stronger than for the ADMM algorithm.

See Also
--------
ADMM: parent class
"""

def __init__(
self,
u,
v,
mu,
A,
B,
b,
optimizers,
cost_funcs=None,
alpha=1,
eta=0.999,
tau=1,
**kwargs,
):
super().__init__(
u=u,
v=b,
mu=mu,
A=A,
B=B,
b=b,
optimizers=optimizers,
cost_funcs=cost_funcs,
**kwargs,
)
self._c_old = np.inf
self._c_new = 0
self._eta = eta
self._alpha_old = alpha
self._alpha_new = alpha
self._v_hat = self.xp.copy(self._v_new)
self._mu_hat = self.xp.copy(self._mu_new)

def _update(self):
# Classical ADMM steps
self._u_new = self._opti_H(
init=self._u_old,
obs=self.B.op(self._v_hat) + self._u_old - self.b,
)
tmp = self.A.op(self._u_new)
self._v_new = self._opti_G(
init=self._v_hat,
obs=tmp + self._u_old - self.b,
)

self._mu_new = self._mu_hat + (tmp + self.B.op(self._v_new) - self.b)

# restarting condition
self._c_new = self.xp.linalg.norm(self._mu_new - self._mu_hat)
self._c_new += self._tau * self.xp.linalg.norm(
self.B.op(self._v_new - self._v_hat),
)
if self._c_new < self._eta * self._c_old:
self._alpha_new = 1 + np.sqrt(1 + 4 * self._alpha_old**2)
beta = (self._alpha_new - 1) / self._alpha_old
self._v_hat = self._v_new + (self._v_new - self._v_old) * beta
self._mu_hat = self._mu_new + (self._mu_new - self._mu_old) * beta
else:
# reboot to old iteration
self._alpha_new = 1
self._v_hat = self._v_old
self._mu_hat = self._mu_old
self._c_new = self._c_old / self._eta

self.xp.copyto(self._u_old, self._u_new)
self.xp.copyto(self._v_old, self._v_new)
self.xp.copyto(self._mu_old, self._mu_new)
# Test cost function for convergence.
if self._cost_func:
self.converge = self.any_convergence_flag()
self.convergd |= self._cost_func.get_cost(self._u_new, self._v_new)
Loading