GPflow · javdrher · Aug 23, 2017 · Aug 13, 2017 · Aug 14, 2017 · Aug 14, 2017
diff --git a/GPflowOpt/acquisition/acquisition.py b/GPflowOpt/acquisition/acquisition.py
@@ -24,24 +24,55 @@
 import tensorflow as tf
 
 import copy
+from functools import wraps
 
 float_type = settings.dtypes.float_type
 
 
+def setup_required(method):
+    """
+    Decorator function to mark methods in Acquisition classes which require running setup if indicated by _needs_setup
+    :param method: acquisition method
+    """
+    @wraps(method)
+    def runnable(instance, *args, **kwargs):
+        assert isinstance(instance, Acquisition)
+        hp = instance.highest_parent
+        if hp._needs_setup:
+            # Avoid infinite loops, caused by setup() somehow invoking the evaluate on another acquisition
+            # e.g. through feasible_data_index.
+            hp._needs_setup = False
+
+            # 1 - optimize
+            hp._optimize_models()
+
+            # 2 - setup
+            hp._setup()
+        results = method(instance, *args, **kwargs)
+        return results
+
+    return runnable
+
+
 class Acquisition(Parameterized):
     """
     An acquisition function maps the belief represented by a Bayesian model into a
     score indicating how promising a point is for evaluation.
 
     In Bayesian Optimization this function is typically optimized over the optimization domain
-    to determine the next point for evaluation.
-
-    An object of this class holds a list of GPflow models. Subclasses implement a build_acquisition function
-    which computes the acquisition function (usually from the predictive distribution) using TensorFlow.
-    Each model is automatically optimized when an acquisition object is constructed or when set_data is called.
-
-    Acquisition functions can be combined through addition or multiplication to construct joint criteria. 
-    For instance, for constrained optimization.
+    to determine the next point for evaluation. An object of this class holds a list of GPflow models. Subclasses
+    implement a build_acquisition function which computes the acquisition function (usually from the predictive
+    distribution) using TensorFlow. Optionally, a method setup can be implemented which computes some quantities which
+    are used to compute the acquisition, but do not depend on candidate points.
+
+    Acquisition functions can be combined through addition or multiplication to construct joint criteria. For instance,
+    for constrained optimization. The objects then form a tree hierarchy.
+
+    Acquisition models implement a lazy strategy to optimize models and run setup. This is implemented by a _needs_setup
+    attribute (similar to the _needs_recompile in GPflow). Calling :meth:`set_data` sets this flag to True. Calling methods
+    marked with the setup_require decorator (such as evaluate) optimize all models, then call setup if this flag is set.
+    In hierarchies, first acquisition objects handling constraint objectives are set up, then the objects handling
+    objectives.
     """
 
     def __init__(self, models=[], optimize_restarts=5):
@@ -56,14 +87,14 @@ def __init__(self, models=[], optimize_restarts=5):
 
         assert (optimize_restarts >= 0)
         self.optimize_restarts = optimize_restarts
-        self._optimize_models()
+        self._needs_setup = True
 
     def _optimize_models(self):
         """
         Optimizes the hyperparameters of all models that the acquisition function is based on.
 
-        It is called automatically during initialization and each time set_data() is called.
-        When using the high-level :class:`..BayesianOptimizer` class calling set_data() is taken care of.
+        It is called automatically during initialization and each time :meth:`set_data` is called.
+        When using the high-level :class:`..BayesianOptimizer` class calling :meth:`set_data` is taken care of.
 
         For each model the hyperparameters of the model at the time it was passed to __init__() are used as initial
         point and optimized. If optimize_restarts is set to >1, additional randomization
@@ -97,6 +128,9 @@ def enable_scaling(self, domain):
         """
         Enables and configures the :class:`.DataScaler` objects wrapping the GP models.
 
+        Sets the _needs_setup attribute to True so the contained models are optimized and :meth:`setup` is run again
+        right before evaluating the :class:`Acquisition` function.
+
         :param domain: :class:`.Domain` object, the input transform of the data scalers is configured as a transform
             from domain to the unit cube with the same dimensionality.
         """
@@ -105,12 +139,14 @@ def enable_scaling(self, domain):
         for m in self.models:
             m.input_transform = domain >> UnitCube(n_inputs)
             m.normalize_output = True
-        self._optimize_models()
+        self.highest_parent._needs_setup = True
 
     def set_data(self, X, Y):
         """
-        Update the training data of the contained models. Automatically triggers a hyperparameter optimization
-        step by calling _optimize_all() and an update of pre-computed quantities by calling setup().
+        Update the training data of the contained models
+
+        Sets the _needs_setup attribute to True so the contained models are optimized and :meth:`setup` is run again
+        right before evaluating the :class:`Acquisition` function.
 
         Let Q be the the sum of the output dimensions of all contained models, Y should have a minimum of
         Q columns. Only the first Q columns of Y are used while returning the scalar Q
@@ -128,11 +164,7 @@ def set_data(self, X, Y):
             model.X = X
             model.Y = Ypart
 
-        self._optimize_models()
-
-        # Only call setup for the high-level acquisition function
-        if self.highest_parent == self:
-            self.setup()
+        self.highest_parent._needs_setup = True
         return num_outputs_sum
 
     @property
@@ -187,14 +219,31 @@ def feasible_data_index(self):
         """
         return np.ones(self.data[0].shape[0], dtype=bool)
 
-    def setup(self):
+    def _setup(self):
         """
         Pre-calculation of quantities used later in the evaluation of the acquisition function for candidate points.
 
-        Automatically triggered by :meth:`~.Acquisition.set_data`.
+        Subclasses can implement this method to compute quantities (such as fmin). The decision when to run this function
+        is governed by :class:`Acquisition`, based on the setup_required decorator on methods which require
+        setup to be run (e.g. set_data).
         """
         pass
 
+    def _setup_constraints(self):
+        """
+        Run only if some outputs handled by this acquisition are constraints. Used in aggregation.
+        """
+        if self.constraint_indices().size > 0:
+            self._setup()
+
+    def _setup_objectives(self):
+        """
+        Run only if all outputs handled by this acquisition are objectives. Used in aggregation.
+        """
+        if self.constraint_indices().size == 0:
+            self._setup()
+
+    @setup_required
     @AutoFlow((float_type, [None, None]))
     def evaluate_with_gradients(self, Xcand):
         """
@@ -206,6 +255,7 @@ def evaluate_with_gradients(self, Xcand):
         acq = self.build_acquisition(Xcand)
         return acq, tf.gradients(acq, [Xcand], name="acquisition_gradient")[0]
 
+    @setup_required
     @AutoFlow((float_type, [None, None]))
     def evaluate(self, Xcand):
         """
@@ -241,6 +291,11 @@ def __mul__(self, other):
             return AcquisitionProduct([self] + other.operands.sorted_params)
         return AcquisitionProduct([self, other])
 
+    def __setattr__(self, key, value):
+        super(Acquisition, self).__setattr__(key, value)
+        if key is '_parent':
+            self.highest_parent._needs_setup = True
+
 
 class AcquisitionAggregation(Acquisition):
     """
@@ -256,10 +311,10 @@ def __init__(self, operands, oper):
         assert (all([isinstance(x, Acquisition) for x in operands]))
         self.operands = ParamList(operands)
         self._oper = oper
-        self.setup()
 
     def _optimize_models(self):
-        pass
+        for oper in self.operands:
+            oper._optimize_models()
 
     @Acquisition.models.getter
     def models(self):
@@ -273,13 +328,22 @@ def set_data(self, X, Y):
         offset = 0
         for operand in self.operands:
             offset += operand.set_data(X, Y[:, offset:])
-        if self.highest_parent == self:
-            self.setup()
         return offset
 
-    def setup(self):
+    def _setup_constraints(self):
         for oper in self.operands:
-            oper.setup()
+            if oper.constraint_indices().size > 0:  # Small optimization, skip subtrees with objectives only
+                oper._setup_constraints()
+
+    def _setup_objectives(self):
+        for oper in self.operands:
+            oper._setup_objectives()
+
+    def _setup(self):
+        # Important: First setup acquisitions involving constraints
+        self._setup_constraints()
+        # Then objectives as these might depend on the constraint acquisition
+        self._setup_objectives()
 
     def constraint_indices(self):
         offset = [0]
@@ -304,7 +368,6 @@ class AcquisitionSum(AcquisitionAggregation):
     """
     Sum of acquisition functions
     """
-
     def __init__(self, operands):
         super(AcquisitionSum, self).__init__(operands, tf.reduce_sum)
 
@@ -319,7 +382,6 @@ class AcquisitionProduct(AcquisitionAggregation):
     """
     Product of acquisition functions
     """
-
     def __init__(self, operands):
         super(AcquisitionProduct, self).__init__(operands, tf.reduce_prod)
 
@@ -350,14 +412,16 @@ def __init__(self, acquisition, n_slices, **kwargs):
         # the call to the constructor of the parent classes, will optimize acquisition, so it obtains the MLE solution.
         super(MCMCAcquistion, self).__init__([acquisition] + copies)
         self._sample_opt = kwargs
-        # Update the hyperparameters of the copies using HMC
-        self._update_hyper_draws()
 
-    def _update_hyper_draws(self):
+    def _optimize_models(self):
+        # Optimize model #1
+        self.operands[0]._optimize_models()
+
+        # Draw samples using HMC
         # Sample each model of the acquisition function - results in a list of 2D ndarrays.
         hypers = np.hstack([model.sample(len(self.operands), **self._sample_opt) for model in self.models])
 
-        # Now visit all copies, and set state
+        # Now visit all acquisition copies, and set state
         for idx, draw in enumerate(self.operands):
             draw.set_state(hypers[idx, :])
 
@@ -371,9 +435,6 @@ def set_data(self, X, Y):
             # This triggers model.optimize() on self.operands[0]
             # All copies have optimization disabled, but must have update data.
             offset = operand.set_data(X, Y)
-        self._update_hyper_draws()
-        if self.highest_parent == self:
-            self.setup()
         return offset
 
     def build_acquisition(self, Xcand):

diff --git a/GPflowOpt/acquisition/ei.py b/GPflowOpt/acquisition/ei.py
@@ -58,10 +58,10 @@ def __init__(self, model):
         """
         super(ExpectedImprovement, self).__init__(model)
         self.fmin = DataHolder(np.zeros(1))
-        self.setup()
+        self._setup()
 
-    def setup(self):
-        super(ExpectedImprovement, self).setup()
+    def _setup(self):
+        super(ExpectedImprovement, self)._setup()
         # Obtain the lowest posterior mean for the previous - feasible - evaluations
         feasible_samples = self.data[0][self.highest_parent.feasible_data_index(), :]
         samples_mean, _ = self.models[0].predict_f(feasible_samples)

diff --git a/GPflowOpt/acquisition/hvpoi.py b/GPflowOpt/acquisition/hvpoi.py
@@ -69,23 +69,27 @@ def __init__(self, models):
         :param models: A list of (possibly multioutput) GPflow representing our belief of the objectives.
         """
         super(HVProbabilityOfImprovement, self).__init__(models)
-        assert self.data[1].shape[1] > 1
-        self.pareto = Pareto(np.hstack((m.predict_f(self.data[0])[0] for m in self.models)))
-        self.reference = DataHolder(self._estimate_reference())
+        num_objectives = self.data[1].shape[1]
+        assert num_objectives > 1
+
+        # Keep empty for now - it is updated in _setup()
+        self.pareto = Pareto(np.empty((0, num_objectives)))
+        self.reference = DataHolder(np.ones((1, num_objectives)))
 
     def _estimate_reference(self):
         pf = self.pareto.front.value
         f = np.max(pf, axis=0, keepdims=True) - np.min(pf, axis=0, keepdims=True)
         return np.max(pf, axis=0, keepdims=True) + 2 * f / pf.shape[0]
 
-    def setup(self):
+    def _setup(self):
         """
         Pre-computes the Pareto set and cell bounds for integrating over the non-dominated region.
         """
-        super(HVProbabilityOfImprovement, self).setup()
+        super(HVProbabilityOfImprovement, self)._setup()
 
         # Obtain hypervolume cell bounds, use prediction mean
-        F = np.hstack((m.predict_f(self.data[0])[0] for m in self.models))
+        feasible_samples = self.data[0][self.highest_parent.feasible_data_index(), :]
+        F = np.hstack((m.predict_f(feasible_samples)[0] for m in self.models))
         self.pareto.update(F)
 
         # Calculate reference point.

diff --git a/GPflowOpt/acquisition/poi.py b/GPflowOpt/acquisition/poi.py
@@ -37,11 +37,12 @@ def __init__(self, model):
         """
         super(ProbabilityOfImprovement, self).__init__(model)
         self.fmin = DataHolder(np.zeros(1))
-        self.setup()
+        self._setup()
 
-    def setup(self):
-        super(ProbabilityOfImprovement, self).setup()
-        samples_mean, _ = self.models[0].predict_f(self.data[0])
+    def _setup(self):
+        super(ProbabilityOfImprovement, self)._setup()
+        feasible_samples = self.data[0][self.highest_parent.feasible_data_index(), :]
+        samples_mean, _ = self.models[0].predict_f(feasible_samples)
         self.fmin.set_data(np.min(samples_mean, axis=0))
 
     def build_acquisition(self, Xcand):

diff --git a/GPflowOpt/bo.py b/GPflowOpt/bo.py
@@ -23,6 +23,7 @@
 from .design import Design, EmptyDesign
 from .pareto import non_dominated_sort
 
+
 class BayesianOptimizer(Optimizer):
     """
     A traditional Bayesian optimization framework implementation.
@@ -57,15 +58,24 @@ def __init__(self, domain, acquisition, optimizer=None, initial=None, scaling=Tr
         assert initial is None or isinstance(initial, Design)
         super(BayesianOptimizer, self).__init__(domain, exclude_gradient=True)
 
-        if scaling:
+        self._scaling = scaling
+        if self._scaling:
             acquisition.enable_scaling(domain)
+
         self.acquisition = acquisition if hyper_draws is None else MCMCAcquistion(acquisition, hyper_draws)
 
         self.optimizer = optimizer or SciPyOptimizer(domain)
         self.optimizer.domain = domain
         initial = initial or EmptyDesign(domain)
         self.set_initial(initial.generate())
 
+    @Optimizer.domain.setter
+    def domain(self, dom):
+        assert self.domain.size == dom.size
+        super(BayesianOptimizer, self.__class__).domain.fset(self, dom)
+        if self._scaling:
+            self.acquisition.enable_scaling(dom)
+
     def _update_model_data(self, newX, newY):
         """
         Update the underlying models of the acquisition function with new data.
@@ -165,7 +175,7 @@ def _optimize(self, fx, n_iter):
         :return: OptimizeResult object
         """
 
-        assert(isinstance(fx, ObjectiveWrapper))
+        assert isinstance(fx, ObjectiveWrapper)
 
         # Evaluate and add the initial design (if any)
         initial = self.get_initial()