NNPDF · APJansen · Feb 21, 2024 · Feb 25, 2024
diff --git a/n3fit/src/n3fit/backends/__init__.py b/n3fit/src/n3fit/backends/__init__.py
@@ -5,6 +5,7 @@
     NN_PREFIX,
     PREPROCESSING_LAYER_ALL_REPLICAS,
     MetaModel,
+    extract_replica_weights,
 )
 from n3fit.backends.keras_backend.base_layers import (
     Concatenate,
@@ -18,5 +19,6 @@
     set_eager,
     set_initial_state,
 )
+from n3fit.backends.keras_backend.metrics import LossMetric
 
 print("Using Keras backend")
diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
@@ -120,7 +120,6 @@ def __init__(self, input_tensors, output_tensors, scaler=None, input_values=None
         self.single_replica_generator = None
 
         self.target_tensors = None
-        self.compute_losses_function = None
         self._scaler = scaler
 
     @tf.autograph.experimental.do_not_convert
@@ -170,6 +169,7 @@ def perform_fit(self, x=None, y=None, epochs=1, **kwargs):
         x_params = self._parse_input(x)
         if y is None:
             y = self.target_tensors
+        y = {name: np.zeros((1, 1)) for name in self.loss.keys()}
         history = super().fit(x=x_params, y=y, epochs=epochs, **kwargs)
         loss_dict = history.history
         return loss_dict
@@ -180,44 +180,6 @@ def predict(self, x=None, **kwargs):
         result = super().predict(x=x, **kwargs)
         return result
 
-    def compute_losses(self):
-        """
-        This function is equivalent to the model ``evaluate(x,y)`` method of most TensorFlow models
-        which return a dictionary of losses per output layer.
-        The losses reported in the ``evaluate`` method for n3fit are, however, summed over replicas.
-        Instead the loss we are interested in is usually the output of the model (i.e., predict)
-        This function then generates a dict of partial losses of the model separated per replica.
-        i.e., the output for experiment {'LHC_exp'} will be an array of Nrep elements.
-
-        Returns
-        -------
-            dict
-                a dictionary with all partial losses of the model
-        """
-        if self.compute_losses_function is None:
-            # If it is the first time we are passing through, compile the function and save it
-            out_names = [f"{i}_loss" for i in self.output_names]
-            out_names.insert(0, "loss")
-
-            # Compile a evaluation function
-            @tf.function
-            def losses_fun():
-                predictions = self(self._parse_input(None))
-                # If we only have one dataset the output changes
-                if len(out_names) == 2:
-                    predictions = [predictions]
-                total_loss = tf.reduce_sum(predictions, axis=0)
-                ret = [total_loss] + predictions
-                return dict(zip(out_names, ret))
-
-            self.compute_losses_function = losses_fun
-
-        ret = self.compute_losses_function()
-
-        # The output of this function is to be used by python (and numpy)
-        # so we need to convert the tensors
-        return _to_numpy_or_python_type(ret)
-
     def compile(
         self,
         optimizer_name="RMSprop",
@@ -237,7 +199,7 @@ def compile(
             - A ``target_output`` can be defined. If done in this way
                 (for instance because we know the target data will be the same for the whole fit)
                 the data will be compiled together with the model and won't be necessary to
-                input it again when calling the ``perform_fit`` or ``compute_losses`` methods.
+                input it again when calling the ``perform_fit`` method.
 
         Parameters
         ----------
@@ -283,7 +245,7 @@ def compile(
                 target_output = [target_output]
             self.target_tensors = target_output
 
-        super().compile(optimizer=opt, loss=loss)
+        super().compile(optimizer=opt, loss=loss, **kwargs)
 
     def set_masks_to(self, names, val=0.0):
         """Set all mask value to the selected value
@@ -337,14 +299,39 @@ def get_layer_re(self, regex):
         check = lambda x: re.match(regex, x.name)
         return list(filter(check, self.layers))
 
-    def get_replica_weights(self, i_replica):
+    def get_all_replica_weights(self):
         """
-        Get the weights of replica i_replica.
+        Get the weights of all the replicas.
 
         This assumes that the only weights are in the
         layer types defined as the constants
             NN_LAYER_ALL_REPLICAS & PREPROCESSING_LAYER_ALL_REPLICAS
 
+        Returns
+        -------
+            list
+                list of dictionaries with the weights of each replica
+        """
+        weights = {}
+        for layer_type in [NN_LAYER_ALL_REPLICAS, PREPROCESSING_LAYER_ALL_REPLICAS]:
+            layer = self.get_layer(layer_type)
+            if is_stacked_single_replicas(layer):
+                # In this (mostly deperecated, only for dense_per_flavour) case, do it one by one
+                weights[layer_type] = []
+                for i_replica in range(self.num_replicas):
+                    weights_ref = layer.get_layer(f"{NN_PREFIX}_{i_replica}").weights
+                    weights = [tf.Variable(w, name=w.name) for w in weights_ref]
+                    weights[layer_type].append(weights)
+            else:
+                weights[layer_type] = layer.weights
+
+        return weights
+
+    def get_replica_weights(self, i_replica):
+        """
+        Get the weights of replica i_replica.
+
+
         Parameters
         ----------
             i_replica: int
@@ -354,12 +341,8 @@ def get_replica_weights(self, i_replica):
             dict
                 dictionary with the weights of the replica
         """
-        weights = {}
-        for layer_type in [NN_LAYER_ALL_REPLICAS, PREPROCESSING_LAYER_ALL_REPLICAS]:
-            layer = self.get_layer(layer_type)
-            weights[layer_type] = get_layer_replica_weights(layer, i_replica)
-
-        return weights
+        all_weights = self.get_all_replica_weights()
+        return extract_replica_weights(all_weights, i_replica)
 
     def set_replica_weights(self, weights, i_replica=0):
         """
@@ -435,35 +418,24 @@ def is_stacked_single_replicas(layer):
     return f"{NN_PREFIX}_0" in [sublayer.name for sublayer in layer.layers]
 
 
-def get_layer_replica_weights(layer, i_replica: int):
+def extract_replica_weights(all_weights, i_replica):
     """
-    Get the weights for the given single replica ``i_replica``,
-    from a ``layer`` that contains the weights of all the replicas.
-
-    Note that the layer could be a complete NN with many separated sub_layers
-    each of which containing weights for all replicas together.
-    This functions separates the per-replica weights and returns the list of weight as if the
-    input ``layer`` were made of _only_ replica ``i_replica``.
+    Extract the weights of replica i_replica from the full weights.
 
     Parameters
     ----------
-        layer: MetaLayer
-            the layer to get the weights from
-        i_replica: int
-            the replica number
+        all_weights: dict
+            dictionary with the weights of all replicas
 
     Returns
     -------
-        weights: list
-            list of weights for the replica
+        dict
+            dictionary with the weights of the replica
     """
-    if is_stacked_single_replicas(layer):
-        weights_ref = layer.get_layer(f"{NN_PREFIX}_{i_replica}").weights
-        weights = [tf.Variable(w, name=w.name) for w in weights_ref]
-    else:
-        weights = [tf.Variable(w[i_replica : i_replica + 1], name=w.name) for w in layer.weights]
-
-    return weights
+    return {
+        layer_type: [tf.Variable(w[i_replica : i_replica + 1], name=w.name) for w in layer_weights]
+        for layer_type, layer_weights in all_weights.items()
+    }
 
 
 def set_layer_replica_weights(layer, weights, i_replica: int):

diff --git a/n3fit/src/n3fit/backends/keras_backend/callbacks.py b/n3fit/src/n3fit/backends/keras_backend/callbacks.py
@@ -10,9 +10,10 @@
 
 import logging
 from time import time
+
 import numpy as np
 import tensorflow as tf
-from tensorflow.keras.callbacks import TensorBoard, Callback
+from tensorflow.keras.callbacks import Callback, TensorBoard
 
 log = logging.getLogger(__name__)
 
@@ -30,7 +31,7 @@ def __init__(self, count_range=100):
         self.last_time = 0
 
     def on_epoch_end(self, epoch, logs=None):
-        """ At the end of every epoch it checks the time """
+        """At the end of every epoch it checks the time"""
         new_time = time()
         if epoch == 0:
             # The first epoch is only useful for starting
@@ -45,13 +46,13 @@ def on_epoch_end(self, epoch, logs=None):
         self.last_time = new_time
 
     def on_train_end(self, logs=None):
-        """ Print the results """
+        """Print the results"""
         total_time = time() - self.starting_time
         n_times = len(self.all_times)
         # Skip the first 100 epochs to avoid fluctuations due to compilations of part of the code
         # by epoch 100 all parts of the code have usually been called so it's a good compromise
-        mean = np.mean(self.all_times[min(110, n_times-1):])
-        std = np.std(self.all_times[min(110, n_times-1):])
+        mean = np.mean(self.all_times[min(110, n_times - 1) :])
+        std = np.std(self.all_times[min(110, n_times - 1) :])
         log.info(f"> > Average time per epoch: {mean:.5} +- {std:.5} s")
         log.info(f"> > > Total time: {total_time/60:.5} min")
 
@@ -77,7 +78,7 @@ def __init__(self, stopping_object, log_freq=100):
         self.stopping_object = stopping_object
 
     def on_epoch_end(self, epoch, logs=None):
-        """ Function to be called at the end of every epoch """
+        """Function to be called at the end of every epoch"""
         print_stats = ((epoch + 1) % self.log_freq) == 0
         # Note that the input logs correspond to the fit before the weights are updated
         self.stopping_object.monitor_chi2(logs, epoch, print_stats=print_stats)
@@ -103,23 +104,26 @@ class LagrangeCallback(Callback):
             List of the names of the datasets to be trained
         multipliers: list(float)
             List of multipliers to be applied
+        losses: dict
+            Dictionary of losses
         update_freq: int
             each how many epochs the positivity lambda is updated
     """
 
-    def __init__(self, datasets, multipliers, update_freq=100):
+    def __init__(self, datasets, multipliers, losses, update_freq=100):
         super().__init__()
         if len(multipliers) != len(datasets):
             raise ValueError("The number of datasets and multipliers do not match")
         self.update_freq = update_freq
         self.datasets = datasets
         self.multipliers = multipliers
         self.updateable_weights = []
+        self.losses = losses
 
     def on_train_begin(self, logs=None):
-        """ Save an instance of all relevant layers """
+        """Save an instance of all relevant layers"""
         for layer_name in self.datasets:
-            layer = self.model.get_layer(layer_name)
+            layer = self.losses[layer_name]
             self.updateable_weights.append(layer.weights)
 
     @tf.function
@@ -133,7 +137,7 @@ def _update_weights(self):
                 w.assign(w * multiplier)
 
     def on_epoch_end(self, epoch, logs=None):
-        """ Function to be called at the end of every epoch """
+        """Function to be called at the end of every epoch"""
         if (epoch + 1) % self.update_freq == 0:
             self._update_weights()
 

diff --git a/n3fit/src/n3fit/backends/keras_backend/metrics.py b/n3fit/src/n3fit/backends/keras_backend/metrics.py
@@ -0,0 +1,41 @@
+import tensorflow as tf
+from tensorflow.keras.metrics import Metric
+
+import n3fit.backends.keras_backend.operations as op
+
+
+class LossMetric(Metric):
+    """
+    Implementation of the (validation) loss as a metric.
+    Keeps track of per replica loss internally, aggregates just for logging.
+
+    Parameters
+    ----------
+        loss_layer : tf.keras.layers.Layer
+            The loss layer to use for the metric.
+        agg : str
+            Aggregation method to use for the replicas. Can be 'sum' or 'mean'.
+    """
+
+    def __init__(self, loss_layer, agg='sum', name='val_loss', **kwargs):
+        super().__init__(name=name, **kwargs)
+        self.loss_layer = loss_layer
+        if agg == 'sum':
+            self.agg = op.sum
+        elif agg == 'mean':
+            self.agg = op.mean
+        else:
+            raise ValueError(f'agg must be sum or mean, got {agg}')
+        num_replicas = loss_layer.output.shape[0]
+        self.per_replica_losses = self.add_weight(
+            name="per_replica_losses", shape=(num_replicas,), initializer="zeros"
+        )
+
+    def update_state(self, y_true, y_pred, sample_weight=None):
+        self.per_replica_losses.assign(self.loss_layer(y_pred))
+
+    def result(self):
+        return self.agg(self.per_replica_losses)
+
+    def reset_state(self):
+        self.per_replica_losses.assign(tf.zeros_like(self.per_replica_losses))
diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py
@@ -145,15 +145,16 @@ def fit_future_tests(n3pdfs=None, experimental_models=None, **_kwargs):
             # Update the mask of the last_model so that its synced with this layer
             last_model.get_layer(layer.name).update_mask(layer.mask)
 
-        # Compute the loss with pdf errors
-        pdf_chi2 = exp_model.compute_losses()["loss"][0]
-
-        # And the loss of the best (most complete) fit
-        best_chi2 = last_model.compute_losses()["loss"][0]
-
-        # Now make this into a measure of the total loss
-        # for instance, any deviation from the "best" value is bad
-        total_loss += np.abs(best_chi2 - pdf_chi2)
+    # TODO Aron: replace compute_losses here, is this even ever called?
+    #        # Compute the loss with pdf errors
+    #        pdf_chi2 = exp_model.compute_losses()["loss"][0]
+    #
+    #        # And the loss of the best (most complete) fit
+    #        best_chi2 = last_model.compute_losses()["loss"][0]
+    #
+    #        # Now make this into a measure of the total loss
+    #        # for instance, any deviation from the "best" value is bad
+    #        total_loss += np.abs(best_chi2 - pdf_chi2)
 
     if compatibility_mode:
         set_eager(False)

diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py
@@ -308,11 +308,12 @@ def _write_metadata_json(self, i, out_path):
             json.dump(json_dict, fs, indent=2, cls=SuperEncoder)
 
         log.info(
-            "Best fit for replica #%d, chi2=%.3f (tr=%.3f, vl=%.3f)",
+            "Best fit for replica #%d, chi2=%.3f (tr=%.3f, vl=%.3f), at epoch %d.",
             self.replica_numbers[i],
             self.true_chi2[i],
             self.tr_chi2[i],
             self.vl_chi2[i],
+            self.stopping_object.e_best_chi2[i],
         )
 
     def _export_pdf_grid(self, i, out_path):
@@ -514,7 +515,6 @@ def evln2lha(evln, nf=6):
         - 2 * evln[8]
     ) / 120
 
-
     # if a heavy quark is not active at Q0 (the scale at which the output of the fit is stored),
     # keep the PDF values at 0.0 to prevent small negative values due to numerical instabilities