ukaea · timothy-nunn · May 16, 2025 · Apr 9, 2025 · Apr 9, 2025 · Apr 9, 2025
diff --git a/docs/source/vmcon.rst b/docs/source/vmcon.rst
@@ -60,8 +60,10 @@ The Quadratic Programming Problem
 The Quadratic Programming Probelm (QPP) will also be known as the Quadratic Sub-Problem (QSP) because it forms only a part of the
 VMCON algorithm--with the other half being the Augmented Lagrangian.
 
-The QPP provides the search direction :math:`\delta_j` which is a vector upon which :math:`\vec{x}_j` will lay.
-Solving the QPP also provides the Lagrange multipliers, :math:`\lambda_{j}`.
+The function :math:`Q(\delta)` is a quadratic approximation of the Lagrangian function.
+Minimising :math:`Q(\delta)` subject to a linearisation of our constraints about :math:`\vec{x}_{j-1}` (aka solving the QPP)
+provides the search direction :math:`\delta_j` from :math:`\vec{x}_{j-1}` upon which :math:`\vec{x}_j` will lay.
+Solving the QPP also provides the Lagrange multipliers (:math:`\lambda_{j}`).
 
 The quadratic program to be minimised on iteration :math:`j` is:
 

diff --git a/src/pyvmcon/exceptions.py b/src/pyvmcon/exceptions.py
@@ -1,8 +1,6 @@
 """Exceptions and errors raised within VMCON."""
 
-import numpy as np
-
-from .problem import Result
+from .problem import Result, VectorType
 
 
 class VMCONConvergenceException(Exception):
@@ -15,10 +13,10 @@ class VMCONConvergenceException(Exception):
     def __init__(
         self,
         *args: object,
-        x: np.ndarray | None = None,
+        x: VectorType | None = None,
         result: Result | None = None,
-        lamda_equality: np.ndarray | None = None,
-        lamda_inequality: np.ndarray | None = None,
+        lamda_equality: VectorType | None = None,
+        lamda_inequality: VectorType | None = None,
     ) -> None:
         """Constructs an exception raised within VMCON.
 

diff --git a/src/pyvmcon/problem.py b/src/pyvmcon/problem.py
@@ -6,27 +6,32 @@
 
 import numpy as np
 
-T = TypeVar("T", np.ndarray, np.number, float)
+ScalarType = TypeVar("ScalarType", np.ndarray, np.number, float)
+"""A scalar variable e.g. a single number (which could be a 0D numpy array)"""
+VectorType = TypeVar("VectorType", bound=np.ndarray)
+"""A numpy array with only 1 dimension"""
+MatrixType = TypeVar("MatrixType", bound=np.ndarray)
+"""A numpy array with 2 dimensions"""
 
 
 class Result(NamedTuple):
     """The data from calling a problem."""
 
-    f: T
-    """Value of the objective function"""
-    df: T
-    """Derivative of the objective function"""
-    eq: np.ndarray
-    """1D array of the values of the equality constraints with shape"""
-    deq: np.ndarray
+    f: ScalarType
+    """Value of the objective function."""
+    df: VectorType
+    """Derivative of the objective function wrt to each input variable."""
+    eq: VectorType
+    """1D array of the values of the equality constraints with shape."""
+    deq: MatrixType
     """2D array of the derivatives of the equality constraints wrt
-    each component of `x`
+    each input variable.
     """
-    ie: np.ndarray
-    """1D array of the values of the inequality constraints"""
-    die: np.ndarray
+    ie: VectorType
+    """1D array of the values of the inequality constraints."""
+    die: MatrixType
     """2D array of the derivatives of the inequality constraints wrt
-    each component of `x`
+    each input variable.
     """
 
 
@@ -42,7 +47,7 @@ class AbstractProblem(ABC):
     """
 
     @abstractmethod
-    def __call__(self, x: np.ndarray) -> Result:
+    def __call__(self, x: VectorType) -> Result:
         """Evaluate the optimisation problem at input x."""
 
     @property
@@ -71,24 +76,27 @@ def total_constraints(self) -> int:
         return self.num_equality + self.num_inequality
 
 
-_FunctionAlias = Callable[[np.ndarray], T]
-_DerivativeFunctionAlias = Callable[[np.ndarray], np.ndarray]
+_ScalarReturnFunctionAlias = Callable[[VectorType], ScalarType]
+_VectorReturnFunctionAlias = Callable[[VectorType], VectorType]
 
 
 class Problem(AbstractProblem):
     """A simple implementation of an AbstractProblem.
 
     It essentially acts as a caller to equations to gather all of the various data.
+
+    Note that VMCON assumes minimisation of f and that inequality constraints are
+    feasible when they return a value >= 0.
     """
 
     def __init__(
         self,
-        f: _FunctionAlias,
-        df: _DerivativeFunctionAlias,
-        equality_constraints: list[_FunctionAlias],
-        inequality_constraints: list[_FunctionAlias],
-        dequality_constraints: list[_DerivativeFunctionAlias],
-        dinequality_constraints: list[_DerivativeFunctionAlias],
+        f: _ScalarReturnFunctionAlias,
+        df: _VectorReturnFunctionAlias,
+        equality_constraints: list[_ScalarReturnFunctionAlias],
+        inequality_constraints: list[_ScalarReturnFunctionAlias],
+        dequality_constraints: list[_VectorReturnFunctionAlias],
+        dinequality_constraints: list[_VectorReturnFunctionAlias],
     ) -> None:
         """Construct the problem."""
         super().__init__()
@@ -100,7 +108,7 @@ def __init__(
         self._dequality_constraints = dequality_constraints
         self._dinequality_constraints = dinequality_constraints
 
-    def __call__(self, x: np.ndarray) -> Result:
+    def __call__(self, x: VectorType) -> Result:
         """Evaluate the problem at input point x."""
         return Result(
             self._f(x),

diff --git a/src/pyvmcon/vmcon.py b/src/pyvmcon/vmcon.py
@@ -13,34 +13,35 @@
     VMCONConvergenceException,
     _QspSolveException,
 )
-from .problem import AbstractProblem, Result, T
+from .problem import AbstractProblem, MatrixType, Result, ScalarType, VectorType
 
 logger = logging.getLogger(__name__)
 
 
 def solve(
     problem: AbstractProblem,
-    x: np.ndarray,
-    lbs: np.ndarray | None = None,
-    ubs: np.ndarray | None = None,
+    x: VectorType,
+    lbs: VectorType | None = None,
+    ubs: VectorType | None = None,
     *,
     max_iter: int = 10,
     epsilon: float = 1e-8,
     qsp_options: dict[str, Any] | None = None,
     initial_B: np.ndarray | None = None,
-    callback: Callable[[int, Result, np.ndarray, float], None] | None = None,
+    callback: Callable[[int, Result, VectorType, float], None] | None = None,
     additional_convergence: Callable[
-        [Result, np.ndarray, np.ndarray, np.ndarray, np.ndarray], None
+        [Result, VectorType, VectorType, VectorType, VectorType], None
     ]
     | None = None,
     overwrite_convergence_criteria: bool = False,
-) -> tuple[np.ndarray, np.ndarray, np.ndarray, Result]:
+) -> tuple[VectorType, VectorType, VectorType, Result]:
     """The main solving loop of the VMCON non-linear constrained optimiser.
 
     Parameters
     ----------
     problem : AbstractProblem
-        Defines the system to be minimised
+        Defines the system to be minimised optionally subject to constraints.
+        Inequality constraints are feasible when >= 0.
 
     x : ndarray
         The initial starting `x` of VMCON
@@ -259,15 +260,18 @@ def solve(
 def solve_qsp(
     problem: AbstractProblem,
     result: Result,
-    x: np.ndarray,
-    B: np.ndarray,
-    lbs: np.ndarray | None,
-    ubs: np.ndarray | None,
+    x: VectorType,
+    B: MatrixType,
+    lbs: VectorType | None,
+    ubs: VectorType | None,
     options: dict[str, Any],
-) -> tuple[np.ndarray, ...]:
+) -> tuple[VectorType, VectorType, VectorType]:
     """Solves the quadratic programming problem.
 
-    This function solves equations 4 and 5 of the VMCON paper.
+    This function solves equations 4 and 5 of the VMCON paper. It does this by assuming
+    the objective function is quadratic and linearising the constraints. The equations
+    can be found on the
+    [VMCON page of the docs](https://ukaea.github.io/PyVMCON/vmcon.html#the-quadratic-programming-problem).
 
     The QSP is solved using cvxpy. cvxpy requires the problem be convex, which is
     ensured by equation 9 of the VMCON paper.
@@ -348,9 +352,9 @@ def solve_qsp(
 
 def convergence_value(
     result: Result,
-    delta_j: np.ndarray,
-    lamda_equality: np.ndarray,
-    lamda_inequality: np.ndarray,
+    delta_j: VectorType,
+    lamda_equality: VectorType,
+    lamda_inequality: VectorType,
 ) -> float:
     """Test if the convergence criteria of VMCON have been met.
 
@@ -394,13 +398,13 @@ def _calculate_mu_i(mu_im1: np.ndarray | None, lamda: np.ndarray) -> np.ndarray:
 def perform_linesearch(
     problem: AbstractProblem,
     result: Result,
-    mu_equality: np.ndarray | None,
-    mu_inequality: np.ndarray | None,
-    lamda_equality: np.ndarray,
-    lamda_inequality: np.ndarray,
-    delta: np.ndarray,
-    x_jm1: np.ndarray,
-) -> tuple[float, np.ndarray, np.ndarray, Result]:
+    mu_equality: VectorType | None,
+    mu_inequality: VectorType | None,
+    lamda_equality: VectorType,
+    lamda_inequality: VectorType,
+    delta: VectorType,
+    x_jm1: VectorType,
+) -> tuple[ScalarType, VectorType, VectorType, Result]:
     """Performs the line search on equation 6 (to minimise phi).
 
     Parameters
@@ -435,7 +439,7 @@ def perform_linesearch(
     mu_equality = _calculate_mu_i(mu_equality, lamda_equality)
     mu_inequality = _calculate_mu_i(mu_inequality, lamda_inequality)
 
-    def phi(result: Result) -> T:
+    def phi(result: Result) -> ScalarType:
         sum_equality = (mu_equality * np.abs(result.eq)).sum()
         sum_inequality = (mu_inequality * np.abs(np.minimum(0, result.ie))).sum()
 
@@ -478,9 +482,9 @@ def phi(result: Result) -> T:
 
 def _derivative_lagrangian(
     result: Result,
-    lamda_equality: np.ndarray,
-    lamda_inequality: np.ndarray,
-) -> np.ndarray:
+    lamda_equality: VectorType,
+    lamda_inequality: VectorType,
+) -> VectorType:
     ind_eq = min(lamda_equality.shape[0], result.deq.shape[0])
     ind_ieq = min(lamda_inequality.shape[0], result.die.shape[0])
     c_equality_prime = (lamda_equality[:ind_eq, None] * result.deq[:ind_eq]).sum(
@@ -493,7 +497,7 @@ def _derivative_lagrangian(
     return result.df - c_equality_prime - c_inequality_prime
 
 
-def _powells_gamma(gamma: np.ndarray, ksi: np.ndarray, B: np.ndarray) -> np.ndarray:
+def _powells_gamma(gamma: VectorType, ksi: VectorType, B: MatrixType) -> np.ndarray:
     ksiTBksi = ksi.T @ B @ ksi  # used throughout eqn 10
     ksiTgamma = ksi.T @ gamma  # dito, to reduce amount of matmul
 
@@ -504,7 +508,7 @@ def _powells_gamma(gamma: np.ndarray, ksi: np.ndarray, B: np.ndarray) -> np.ndar
     return theta * gamma + (1 - theta) * (B @ ksi)  # eqn 9
 
 
-def _revise_B(current_B: np.ndarray, ksi: np.ndarray, gamma: np.ndarray) -> np.ndarray:
+def _revise_B(current_B: MatrixType, ksi: VectorType, gamma: VectorType) -> MatrixType:
     """Revises B using a BFGS update.
 
     Implements Equation 8 of the Crane report.
@@ -519,12 +523,12 @@ def _revise_B(current_B: np.ndarray, ksi: np.ndarray, gamma: np.ndarray) -> np.n
 def calculate_new_B(
     result: Result,
     new_result: Result,
-    B: np.ndarray,
-    x_jm1: np.ndarray,
-    x_j: np.ndarray,
-    lamda_equality: np.ndarray,
-    lamda_inequality: np.ndarray,
-) -> np.ndarray:
+    B: MatrixType,
+    x_jm1: VectorType,
+    x_j: VectorType,
+    lamda_equality: VectorType,
+    lamda_inequality: VectorType,
+) -> MatrixType:
     """Updates the hessian approximation matrix."""
     # xi (the symbol name) would be a bit confusing in this context,
     # ksi is how its pronounced in modern greek
@@ -554,6 +558,6 @@ def calculate_new_B(
     return _revise_B(B, ksi, gamma)
 
 
-def _find_out_of_bounds_vars(higher: np.ndarray, lower: np.ndarray) -> list[str]:
+def _find_out_of_bounds_vars(higher: VectorType, lower: VectorType) -> list[str]:
     """Return the indices of the out of bounds variables."""
     return np.nonzero((higher - lower) < 0)[0].astype(str).tolist()