From 43aeb42e90b81a3bfd90b574fdcbfe7867fa0df7 Mon Sep 17 00:00:00 2001
From: Aron <aronpjansen@gmail.com>
Date: Mon, 4 Mar 2024 09:13:15 +0100
Subject: [PATCH 1/4] Rename kernel to multi_kernel

---
 n3fit/src/n3fit/backends/keras_backend/multi_dense.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
index b8caa9a3fd..d0c0bdaf4a 100644
--- a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
+++ b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
@@ -56,7 +56,7 @@ def __init__(
 
     def build(self, input_shape):
         input_dim = input_shape[-1]
-        self.kernel = self.add_weight(
+        self.multi_kernel = self.add_weight(
             name="kernel",
             shape=(self.replicas, input_dim, self.units),
             initializer=self.kernel_initializer,
@@ -80,7 +80,7 @@ def build(self, input_shape):
         # TODO: benchmark against the replica-agnostic einsum below and make that default
         # see https://github.com/NNPDF/nnpdf/pull/1905#discussion_r1489344081
         if self.replicas == 1:
-            matmul = lambda inputs: tf.tensordot(inputs, self.kernel[0], [[-1], [0]])
+            matmul = lambda inputs: tf.tensordot(inputs, self.multi_kernel[0], [[-1], [0]])
             if self.is_first_layer:
                 # Manually add replica dimension
                 self.matmul = lambda x: tf.expand_dims(matmul(x), axis=1)
@@ -88,7 +88,7 @@ def build(self, input_shape):
                 self.matmul = matmul
         else:
             einrule = "bnf,rfg->brng" if self.is_first_layer else "brnf,rfg->brng"
-            self.matmul = lambda inputs: tf.einsum(einrule, inputs, self.kernel)
+            self.matmul = lambda inputs: tf.einsum(einrule, inputs, self.multi_kernel)
 
     def call(self, inputs):
         """

From 2c9ce24d88999adb6f14197baf4243a0485ccacf Mon Sep 17 00:00:00 2001
From: Aron <aronpjansen@gmail.com>
Date: Mon, 4 Mar 2024 11:21:24 +0100
Subject: [PATCH 2/4] list->tuple in output shape

---
 n3fit/src/n3fit/backends/keras_backend/multi_dense.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
index d0c0bdaf4a..9fa0ac5835 100644
--- a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
+++ b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
@@ -125,7 +125,7 @@ def compute_output_shape(self, input_shape):
         output_shape = super().compute_output_shape(input_shape)
 
         # Add back the replica axis to the output shape.
-        output_shape = output_shape[:1] + [self.replicas] + output_shape[1:]
+        output_shape = output_shape[:1] + (self.replicas,) + output_shape[1:]
 
         return output_shape
 

From baefa5fb6766f8431ab29e6c8fd0200ac5facb54 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Mon, 4 Mar 2024 12:03:58 +0100
Subject: [PATCH 3/4] miscelaneous fixes for tensorflow 2.16

kicking down the can

recover previous behaviour

try-except for 3.11

deal with type missmatch

make sure units are int

remove pdb

fix change in how weights are named

Update n3fit/src/n3fit/tests/test_hyperopt.py

0 is understood as None by initializer

change scope of hyperopt test

bugfix

312
---
 .github/workflows/python_installation.yml     |  2 +-
 .../n3fit/backends/keras_backend/MetaLayer.py | 15 +++------
 .../n3fit/backends/keras_backend/MetaModel.py | 19 +++++++-----
 .../n3fit/backends/keras_backend/callbacks.py | 23 +++++++++-----
 .../backends/keras_backend/constraints.py     | 10 ++----
 .../backends/keras_backend/multi_dense.py     |  4 +--
 .../backends/keras_backend/operations.py      | 14 ++++++---
 .../n3fit/hyper_optimization/hyper_scan.py    | 15 ++++++---
 n3fit/src/n3fit/layers/observable.py          |  4 +--
 n3fit/src/n3fit/model_gen.py                  | 20 ++++++------
 n3fit/src/n3fit/tests/test_hyperopt.py        | 21 +++++++++----
 n3fit/src/n3fit/tests/test_multidense.py      | 31 +++++++++++--------
 12 files changed, 102 insertions(+), 76 deletions(-)

diff --git a/.github/workflows/python_installation.yml b/.github/workflows/python_installation.yml
index 1496c5f743..9116de7943 100644
--- a/.github/workflows/python_installation.yml
+++ b/.github/workflows/python_installation.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python-version: ["3.11"]
+        python-version: ["3.11", "3.12"]
         include:
           - os: ubuntu-latest
             CONDA_OS: linux-64
diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py b/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py
index af4210d603..31def842c4 100644
--- a/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py
+++ b/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py
@@ -9,12 +9,7 @@
 """
 
 from tensorflow.keras.layers import Layer
-from tensorflow.keras.initializers import (
-    Constant,
-    RandomUniform,
-    glorot_normal,
-    glorot_uniform,
-)
+from tensorflow.keras.initializers import Constant, RandomUniform, glorot_normal, glorot_uniform
 
 # Define in this dictionary new initializers as well as the arguments they accept (with default values if needed be)
 initializers = {
@@ -37,9 +32,7 @@ class MetaLayer(Layer):
     weight_inits = []
 
     # Building function
-    def builder_helper(
-        self, name, kernel_shape, initializer, trainable=True, constraint=None
-    ):
+    def builder_helper(self, name, kernel_shape, initializer, trainable=True, constraint=None):
         """
         Creates a kernel that should be saved as an attribute of the caller class
         name: name of the kernel
@@ -73,9 +66,9 @@ def get_weight_by_name(self, weight_name, internal_count=0):
             weight_name: str
                 Name of the weight
         """
-        check_name = f"{self.name}/{weight_name}:{internal_count}"
+        main_name = f"{self.name}/{weight_name}"
         for weight in self.weights:
-            if weight.name == check_name:
+            if weight.name in (f"{main_name}:{internal_count}", main_name, weight_name):
                 return weight
         return None
 
diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
index 2533d8456f..2956a25d2e 100644
--- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
+++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
@@ -6,8 +6,8 @@
 """
 
 import re
+import shutil
 
-import h5py
 import numpy as np
 import tensorflow as tf
 from tensorflow.keras import optimizers as Kopt
@@ -16,12 +16,6 @@
 
 import n3fit.backends.keras_backend.operations as op
 
-# Check the TF version to check if legacy-mode is needed (TF < 2.2)
-tf_version = tf.__version__.split(".")
-if int(tf_version[0]) == 2 and int(tf_version[1]) < 2:
-    raise NotImplementedError("n3fit needs TF > 2.2 in order to work")
-
-
 # We need a function to transform tensors to numpy/python primitives
 # which is not part of the official TF interface and can change with the version
 if hasattr(tf_utils, "to_numpy_or_python_type"):
@@ -414,6 +408,17 @@ def load_identical_replicas(self, model_file):
         for i_replica in range(self.num_replicas):
             self.set_replica_weights(weights, i_replica)
 
+    def save_weights(self, file, save_format="h5"):
+        """
+        Compatibility function for tf < 2.16
+        """
+        try:
+            super().save_weights(file, save_format=save_format)
+        except TypeError:
+            new_file = file.with_suffix(".weights.h5")
+            super().save_weights(new_file)
+            shutil.move(new_file, file)
+
 
 def is_stacked_single_replicas(layer):
     """
diff --git a/n3fit/src/n3fit/backends/keras_backend/callbacks.py b/n3fit/src/n3fit/backends/keras_backend/callbacks.py
index 7349d6be36..c72ea9fb5c 100644
--- a/n3fit/src/n3fit/backends/keras_backend/callbacks.py
+++ b/n3fit/src/n3fit/backends/keras_backend/callbacks.py
@@ -10,9 +10,10 @@
 
 import logging
 from time import time
+
 import numpy as np
 import tensorflow as tf
-from tensorflow.keras.callbacks import TensorBoard, Callback
+from tensorflow.keras.callbacks import Callback, TensorBoard
 
 log = logging.getLogger(__name__)
 
@@ -30,7 +31,7 @@ def __init__(self, count_range=100):
         self.last_time = 0
 
     def on_epoch_end(self, epoch, logs=None):
-        """ At the end of every epoch it checks the time """
+        """At the end of every epoch it checks the time"""
         new_time = time()
         if epoch == 0:
             # The first epoch is only useful for starting
@@ -45,13 +46,13 @@ def on_epoch_end(self, epoch, logs=None):
         self.last_time = new_time
 
     def on_train_end(self, logs=None):
-        """ Print the results """
+        """Print the results"""
         total_time = time() - self.starting_time
         n_times = len(self.all_times)
         # Skip the first 100 epochs to avoid fluctuations due to compilations of part of the code
         # by epoch 100 all parts of the code have usually been called so it's a good compromise
-        mean = np.mean(self.all_times[min(110, n_times-1):])
-        std = np.std(self.all_times[min(110, n_times-1):])
+        mean = np.mean(self.all_times[min(110, n_times - 1) :])
+        std = np.std(self.all_times[min(110, n_times - 1) :])
         log.info(f"> > Average time per epoch: {mean:.5} +- {std:.5} s")
         log.info(f"> > > Total time: {total_time/60:.5} min")
 
@@ -75,9 +76,15 @@ def __init__(self, stopping_object, log_freq=100):
         super().__init__()
         self.log_freq = log_freq
         self.stopping_object = stopping_object
+        self._current_loss = None
+
+    def on_epoch_begin(self, epoch, logs=None):
+        # TODO This is an unnecessary performance hit, just for testing
+        self._current_loss = self.model.compute_losses()
 
     def on_epoch_end(self, epoch, logs=None):
-        """ Function to be called at the end of every epoch """
+        """Function to be called at the end of every epoch"""
+        logs = self._current_loss
         print_stats = ((epoch + 1) % self.log_freq) == 0
         # Note that the input logs correspond to the fit before the weights are updated
         self.stopping_object.monitor_chi2(logs, epoch, print_stats=print_stats)
@@ -117,7 +124,7 @@ def __init__(self, datasets, multipliers, update_freq=100):
         self.updateable_weights = []
 
     def on_train_begin(self, logs=None):
-        """ Save an instance of all relevant layers """
+        """Save an instance of all relevant layers"""
         for layer_name in self.datasets:
             layer = self.model.get_layer(layer_name)
             self.updateable_weights.append(layer.weights)
@@ -133,7 +140,7 @@ def _update_weights(self):
                 w.assign(w * multiplier)
 
     def on_epoch_end(self, epoch, logs=None):
-        """ Function to be called at the end of every epoch """
+        """Function to be called at the end of every epoch"""
         if (epoch + 1) % self.update_freq == 0:
             self._update_weights()
 
diff --git a/n3fit/src/n3fit/backends/keras_backend/constraints.py b/n3fit/src/n3fit/backends/keras_backend/constraints.py
index b186cd2638..5b1bd8d413 100644
--- a/n3fit/src/n3fit/backends/keras_backend/constraints.py
+++ b/n3fit/src/n3fit/backends/keras_backend/constraints.py
@@ -3,8 +3,8 @@
 """
 
 import tensorflow as tf
-from tensorflow.keras.constraints import MinMaxNorm
 from tensorflow.keras import backend as K
+from tensorflow.keras.constraints import MinMaxNorm
 
 
 class MinMaxWeight(MinMaxNorm):
@@ -14,15 +14,11 @@ class MinMaxWeight(MinMaxNorm):
     """
 
     def __init__(self, min_value, max_value, **kwargs):
-        super(MinMaxWeight, self).__init__(
-            min_value=min_value, max_value=max_value, **kwargs
-        )
+        super(MinMaxWeight, self).__init__(min_value=min_value, max_value=max_value, **kwargs)
 
-    @tf.function
     def __call__(self, w):
         norms = K.sum(w, axis=self.axis, keepdims=True)
         desired = (
-            self.rate * K.clip(norms, self.min_value, self.max_value)
-            + (1 - self.rate) * norms
+            self.rate * K.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms
         )
         return w * desired / (K.epsilon() + norms)
diff --git a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
index 9fa0ac5835..3b2037c47e 100644
--- a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
+++ b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
@@ -99,8 +99,8 @@ def call(self, inputs):
         If the input already contains multiple replica outputs, it is equivalent
         to applying each replica to its corresponding input.
         """
-        if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype:
-            inputs = tf.cast(inputs, dtype=self._compute_dtype_object)
+        # cast always
+        inputs = tf.cast(inputs, dtype=self.compute_dtype)
 
         outputs = self.matmul(inputs)
 
diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py
index 12d16b0d73..c73a548b53 100644
--- a/n3fit/src/n3fit/backends/keras_backend/operations.py
+++ b/n3fit/src/n3fit/backends/keras_backend/operations.py
@@ -22,8 +22,10 @@
     Note that tensor operations can also be applied to layers as the output of a layer is a tensor
     equally operations are automatically converted to layers when used as such.
 """
+
 from typing import Optional
 
+import keras
 import numpy as np
 import numpy.typing as npt
 import tensorflow as tf
@@ -249,11 +251,15 @@ def concatenate(tensor_list, axis=-1, target_shape=None, name=None):
     Concatenates a list of numbers or tensor into a bigger tensor
     If the target shape is given, the output is reshaped to said shape
     """
-    concatenated_tensor = tf.concat(tensor_list, axis, name=name)
-    if target_shape:
-        return K.reshape(concatenated_tensor, target_shape)
-    else:
+    try:
+        # For tensorflow >= 2.16, Keras >= 3
+        concatenated_tensor = keras.ops.concatenate(tensor_list, axis=axis)
+    except AttributeError:
+        concatenated_tensor = tf.concat(tensor_list, axis=axis)
+
+    if target_shape is None:
         return concatenated_tensor
+    return K.reshape(concatenated_tensor, target_shape)
 
 
 def einsum(equation, *args, **kwargs):
diff --git a/n3fit/src/n3fit/hyper_optimization/hyper_scan.py b/n3fit/src/n3fit/hyper_optimization/hyper_scan.py
index 174e921677..f06234ff6a 100644
--- a/n3fit/src/n3fit/hyper_optimization/hyper_scan.py
+++ b/n3fit/src/n3fit/hyper_optimization/hyper_scan.py
@@ -16,6 +16,7 @@
 import logging
 
 import hyperopt
+from hyperopt.pyll.base import scope
 import numpy as np
 
 from n3fit.backends import MetaLayer, MetaModel
@@ -36,7 +37,7 @@ def hp_uniform(key, lower_end, higher_end):
     return hyperopt.hp.uniform(key, lower_end, higher_end)
 
 
-def hp_quniform(key, lower_end, higher_end, step_size=None, steps=None):
+def hp_quniform(key, lower_end, higher_end, step_size=None, steps=None, make_int=False):
     """Like uniform but admits a step_size"""
     if lower_end is None or higher_end is None:
         return None
@@ -44,7 +45,11 @@ def hp_quniform(key, lower_end, higher_end, step_size=None, steps=None):
         step_size = lower_end
     if steps:
         step_size = (higher_end - lower_end) / steps
-    return hyperopt.hp.quniform(key, lower_end, higher_end, step_size)
+
+    ret = hyperopt.hp.quniform(key, lower_end, higher_end, step_size)
+    if make_int:
+        ret = scope.int(ret)
+    return ret
 
 
 def hp_loguniform(key, lower_end, higher_end):
@@ -276,7 +281,7 @@ def stopping(self, min_epochs=None, max_epochs=None, min_patience=None, max_pati
         stopping_key = "stopping_patience"
 
         if min_epochs is not None and max_epochs is not None:
-            epochs = hp_quniform(epochs_key, min_epochs, max_epochs, step_size=1)
+            epochs = hp_quniform(epochs_key, min_epochs, max_epochs, step_size=1, make_int=True)
             self._update_param(epochs_key, epochs)
 
         if min_patience is not None or max_patience is not None:
@@ -429,7 +434,9 @@ def architecture(
             units = []
             for i in range(n):
                 units_label = "nl{0}:-{1}/{0}".format(n, i)
-                units_sampler = hp_quniform(units_label, min_units, max_units, step_size=1)
+                units_sampler = hp_quniform(
+                    units_label, min_units, max_units, step_size=1, make_int=True
+                )
                 units.append(units_sampler)
             # The number of nodes in the last layer are read from the runcard
             units.append(output_size)
diff --git a/n3fit/src/n3fit/layers/observable.py b/n3fit/src/n3fit/layers/observable.py
index 2a5dd9a93e..b1a2d701b3 100644
--- a/n3fit/src/n3fit/layers/observable.py
+++ b/n3fit/src/n3fit/layers/observable.py
@@ -81,9 +81,6 @@ def build(self, input_shape):
 
         super().build(input_shape)
 
-    def compute_output_shape(self, input_shape):
-        return (self.output_dim, None)
-
     def call(self, pdf):
         """
         This function perform the convolution with the fktable and one (DIS) or two (DY-like) pdfs.
@@ -111,6 +108,7 @@ def call(self, pdf):
         observables = self.operation(observables)
         return observables
 
+    # Overridables
     @abstractmethod
     def gen_mask(self, basis):
         pass
diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py
index 219bbdfc11..8c4c30c59a 100644
--- a/n3fit/src/n3fit/model_gen.py
+++ b/n3fit/src/n3fit/model_gen.py
@@ -9,6 +9,7 @@
 
 
 """
+
 from dataclasses import dataclass
 from typing import Callable, List
 
@@ -73,7 +74,9 @@ def _generate_loss(self, mask=None):
         if self.invcovmat is not None:
             if self.rotation:
                 # If we have a matrix diagonal only, padd with 0s and hope it's not too heavy on memory
-                invcovmat_matrix = np.eye(self.invcovmat.shape[-1]) * self.invcovmat[..., np.newaxis]
+                invcovmat_matrix = (
+                    np.eye(self.invcovmat.shape[-1]) * self.invcovmat[..., np.newaxis]
+                )
                 if self.covmat is not None:
                     covmat_matrix = np.eye(self.covmat.shape[-1]) * self.covmat[..., np.newaxis]
                 else:
@@ -82,11 +85,7 @@ def _generate_loss(self, mask=None):
                 covmat_matrix = self.covmat
                 invcovmat_matrix = self.invcovmat
             loss = losses.LossInvcovmat(
-                invcovmat_matrix,
-                self.data,
-                mask,
-                covmat=covmat_matrix,
-                name=self.name
+                invcovmat_matrix, self.data, mask, covmat=covmat_matrix, name=self.name
             )
         elif self.positivity:
             loss = losses.LossPositivity(name=self.name, c=self.multiplier)
@@ -642,9 +641,10 @@ def compute_unnormalized_pdf(x):
 
         if photons:
             # add batch and flavor dimensions
-            photon_integrals = op.batchit(op.batchit(photons.integral))
+            ph_tensor = op.numpy_to_tensor(photons.integral)
+            photon_integrals = op.batchit(op.batchit(ph_tensor))
         else:
-            photon_integrals = np.zeros((1, num_replicas, 1))
+            photon_integrals = op.numpy_to_tensor(np.zeros((1, num_replicas, 1)))
 
         PDFs_normalized = sumrule_layer(
             {
@@ -737,7 +737,7 @@ def layer_generator(i_layer, nodes_out, activation):
                 layer = base_layer_selector(
                     layer_type,
                     kernel_initializer=initializers,
-                    units=nodes_out,
+                    units=int(nodes_out),
                     activation=activation,
                     input_shape=(nodes_in,),
                     basis_size=basis_size,
@@ -755,7 +755,7 @@ def layer_generator(i_layer, nodes_out, activation):
                 layer_type,
                 replica_seeds=replica_seeds,
                 kernel_initializer=MetaLayer.select_initializer(initializer_name, seed=i_layer),
-                units=nodes_out,
+                units=int(nodes_out),
                 activation=activation,
                 is_first_layer=(i_layer == 0),
                 regularizer=reg,
diff --git a/n3fit/src/n3fit/tests/test_hyperopt.py b/n3fit/src/n3fit/tests/test_hyperopt.py
index cecc747452..f53377b083 100644
--- a/n3fit/src/n3fit/tests/test_hyperopt.py
+++ b/n3fit/src/n3fit/tests/test_hyperopt.py
@@ -32,13 +32,21 @@ def load_data(info_file):
 
 
 def test_restart_from_pickle(tmp_path):
-    """Ensure that our hyperopt restart works as expected"""
+    """Ensure that after a hyperopt restart, the testing continues
+    from the same point.
+    The test is set up so that it does one trial, then stops, then a second one
+    And then this is compared with two trials one after the other.
+
+    The test checks that the starting point of the second trial is the same in both cases
+    """
     # Prepare the run
     quickcard = f"hyper-{QUICKNAME}.yml"
     quickpath = REGRESSION_FOLDER / quickcard
-    # Set up some options
-    n_trials_stop = 2
-    n_trials_total = 4
+
+    # Set the test up so that it does one trial, then stops, then does another one
+    # and then we do two
+    n_trials_stop = 1
+    n_trials_total = 2
     output_restart = tmp_path / f"run_{n_trials_stop}_trials_and_then_{n_trials_total}_trials"
     output_direct = tmp_path / f"run_{n_trials_total}_trials"
 
@@ -46,7 +54,7 @@ def test_restart_from_pickle(tmp_path):
     shutil.copy(quickpath, tmp_path)
     # run some trials for the first time
     sp.run(
-        f"{EXE} {quickpath} {REPLICA} --hyperopt {n_trials_stop} " f"-o {output_restart}".split(),
+        f"{EXE} {quickpath} {REPLICA} --hyperopt {n_trials_stop} -o {output_restart}".split(),
         cwd=tmp_path,
         check=True,
     )
@@ -78,4 +86,5 @@ def test_restart_from_pickle(tmp_path):
         assert restart_json[i]['misc'] == direct_json[i]['misc']
         assert restart_json[i]['state'] == direct_json[i]['state']
         assert restart_json[i]['tid'] == direct_json[i]['tid']
-        assert restart_json[i]['result'] == direct_json[i]['result']
+        assert restart_json[i]['misc']['idxs'] == direct_json[i]['misc']['idxs']
+    # Note that it doesn't check the final loss of the second trial
diff --git a/n3fit/src/n3fit/tests/test_multidense.py b/n3fit/src/n3fit/tests/test_multidense.py
index 6c7df89e3a..e2a8ddc412 100644
--- a/n3fit/src/n3fit/tests/test_multidense.py
+++ b/n3fit/src/n3fit/tests/test_multidense.py
@@ -15,20 +15,21 @@ def test_multidense():
                 units=8,
                 replica_seeds=[42, 43],
                 is_first_layer=True,
-                kernel_initializer=GlorotUniform(seed=0),
+                kernel_initializer=GlorotUniform(seed=5),
             ),
             MultiDense(units=4, replica_seeds=[52, 53], kernel_initializer=GlorotUniform(seed=100)),
         ]
     )
-    single_models = [
-        Sequential(
-            [
-                Dense(units=8, kernel_initializer=GlorotUniform(seed=42 + r)),
-                Dense(units=4, kernel_initializer=GlorotUniform(seed=52 + r + 100)),
-            ]
+    single_models = []
+    for r in range(replicas):
+        single_models.append(
+            Sequential(
+                [
+                    Dense(units=8, kernel_initializer=GlorotUniform(seed=42 + r + 5)),
+                    Dense(units=4, kernel_initializer=GlorotUniform(seed=52 + r + 100)),
+                ]
+            )
         )
-        for r in range(replicas)
-    ]
 
     gridsize, features = 100, 3
     multi_dense_model.build(input_shape=(None, gridsize, features))
@@ -46,12 +47,17 @@ def test_multidense():
 
 def test_initializers():
     input_shape = (None, 3, 1)
-    dense_layers = []
+    dense_weights = []
     for r in range(2):
         dense_layer = Dense(units=2, kernel_initializer=GlorotUniform(seed=42 + r))
         dense_layer.build(input_shape=input_shape)
-        dense_layers.append(dense_layer)
-    stacked_weights = tf.stack([dense_layer.weights[0] for dense_layer in dense_layers], axis=0)
+        try:
+            dense_weights.append(dense_layer.weights[0].value.numpy())
+        except AttributeError:
+            # In tensorflow < 2.16, value was a function
+            dense_weights.append(dense_layer.weights[0].value().numpy())
+
+    stacked_weights = np.stack(dense_weights, axis=0)
 
     multi_dense_layer = MultiDense(
         units=2,
@@ -62,6 +68,5 @@ def test_initializers():
     multi_dense_layer.build(input_shape=input_shape)
 
     multi_dense_weights = multi_dense_layer.weights[0].numpy()
-    stacked_weights = stacked_weights.numpy()
 
     np.testing.assert_allclose(multi_dense_weights, stacked_weights)

From 9e6dd2cc780eb93548cd805a563c10ea0acc90ef Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Tue, 5 Mar 2024 15:43:53 +0100
Subject: [PATCH 4/4] change the per-100-epochs monitoring of chi2 to avoid
 having to recompute losses

---
 .../n3fit/backends/keras_backend/MetaModel.py | 10 ++-
 .../n3fit/backends/keras_backend/callbacks.py | 12 ++--
 .../backends/keras_backend/operations.py      | 14 ++--
 n3fit/src/n3fit/stopping.py                   | 72 +++++++++----------
 4 files changed, 56 insertions(+), 52 deletions(-)

diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
index 2956a25d2e..0169019d18 100644
--- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
+++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
@@ -410,12 +410,18 @@ def load_identical_replicas(self, model_file):
 
     def save_weights(self, file, save_format="h5"):
         """
-        Compatibility function for tf < 2.16
+        Compatibility function for:
+            - tf < 2.16, keras < 3: argument save format needed for h5
+            - tf >= 2.16, keras >= 3: save format is deduced from the file extension
+        In both cases, the final weights are finally copied to the ``file`` path.
         """
         try:
+            # Keras 2, tf < 2.16
             super().save_weights(file, save_format=save_format)
         except TypeError:
-            new_file = file.with_suffix(".weights.h5")
+            # Newer versions of keras (>=3) drop the ``save_format`` argument
+            # and instead take the format from the extension of the file
+            new_file = file.with_suffix(f".weights.{save_format}")
             super().save_weights(new_file)
             shutil.move(new_file, file)
 
diff --git a/n3fit/src/n3fit/backends/keras_backend/callbacks.py b/n3fit/src/n3fit/backends/keras_backend/callbacks.py
index c72ea9fb5c..05087dd640 100644
--- a/n3fit/src/n3fit/backends/keras_backend/callbacks.py
+++ b/n3fit/src/n3fit/backends/keras_backend/callbacks.py
@@ -76,15 +76,13 @@ def __init__(self, stopping_object, log_freq=100):
         super().__init__()
         self.log_freq = log_freq
         self.stopping_object = stopping_object
-        self._current_loss = None
-
-    def on_epoch_begin(self, epoch, logs=None):
-        # TODO This is an unnecessary performance hit, just for testing
-        self._current_loss = self.model.compute_losses()
 
     def on_epoch_end(self, epoch, logs=None):
-        """Function to be called at the end of every epoch"""
-        logs = self._current_loss
+        """Function to be called at the end of every epoch
+        Every ``log_freq`` number of epochs, the ``monitor_chi2`` method of the ``stopping_object``
+        will be called and the validation loss (broken down by experiment) will be logged.
+        For the training model only the total loss is logged during the training.
+        """
         print_stats = ((epoch + 1) % self.log_freq) == 0
         # Note that the input logs correspond to the fit before the weights are updated
         self.stopping_object.monitor_chi2(logs, epoch, print_stats=print_stats)
diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py
index c73a548b53..844274379c 100644
--- a/n3fit/src/n3fit/backends/keras_backend/operations.py
+++ b/n3fit/src/n3fit/backends/keras_backend/operations.py
@@ -37,6 +37,14 @@
 
 from validphys.convolution import OP
 
+# Select a concatenate function depending on the tensorflow version
+try:
+    # For tensorflow >= 2.16, Keras >= 3
+    concatenate_function = keras.ops.concatenate
+except AttributeError:
+    # keras.ops was introduced in keras 3
+    concatenate_function = tf.concat
+
 
 def evaluate(tensor):
     """Evaluate input tensor using the backend"""
@@ -251,11 +259,7 @@ def concatenate(tensor_list, axis=-1, target_shape=None, name=None):
     Concatenates a list of numbers or tensor into a bigger tensor
     If the target shape is given, the output is reshaped to said shape
     """
-    try:
-        # For tensorflow >= 2.16, Keras >= 3
-        concatenated_tensor = keras.ops.concatenate(tensor_list, axis=axis)
-    except AttributeError:
-        concatenated_tensor = tf.concat(tensor_list, axis=axis)
+    concatenated_tensor = concatenate_function(tensor_list, axis=axis)
 
     if target_shape is None:
         return concatenated_tensor
diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py
index 380c5445d7..dec9236b55 100644
--- a/n3fit/src/n3fit/stopping.py
+++ b/n3fit/src/n3fit/stopping.py
@@ -146,13 +146,16 @@ class FitState:
             all losses for the training model
         validation_info: dict
             all losses for the validation model
+        training_loss: float
+            total training loss, this can be given if per-exp``training_info``
+            is not available
     """
 
     vl_ndata = None
     tr_ndata = None
     vl_suffix = None
 
-    def __init__(self, training_info, validation_info):
+    def __init__(self, training_info, validation_info, training_loss=None):
         if self.vl_ndata is None or self.tr_ndata is None or self.vl_suffix is None:
             raise ValueError(
                 "FitState cannot be instantiated until vl_ndata, tr_ndata and vl_suffix are filled"
@@ -164,6 +167,8 @@ def __init__(self, training_info, validation_info):
         self._tr_chi2 = None  # This is an overall training chi2
         self._vl_dict = None
         self._tr_dict = None
+        # This can be given if ``training_info`` is not given
+        self._training_loss = training_loss
 
     @property
     def vl_loss(self):
@@ -173,6 +178,8 @@ def vl_loss(self):
     @property
     def tr_loss(self):
         """Return the total validation loss as it comes from the info dictionaries"""
+        if self._training is None:
+            return self._training_loss
         return self._training.get("loss")
 
     def _parse_chi2(self):
@@ -223,7 +230,7 @@ def total_partial_tr_chi2(self):
 
     def total_partial_vl_chi2(self):
         """Return the vl chi2 summed over replicas per experiment"""
-        return {k: np.sum(v) for k, v in self.all_tr_chi2.items()}
+        return {k: np.sum(v) for k, v in self.all_vl_chi2.items()}
 
     def total_tr_chi2(self):
         """Return the total tr chi2 summed over replicas"""
@@ -273,27 +280,12 @@ def get_state(self, epoch):
                 f"Tried to get obtain the state for epoch {epoch} when only {len(self._history)} epochs have been saved"
             ) from e
 
-    def register(self, epoch, training_info, validation_info):
-        """Save a new fitstate and updates the current final epoch
-
-        Parameters
-        ----------
-            epoch: int
-                the current epoch of the fit
-            training_info: dict
-                all losses for the training model
-            validation_info: dict
-                all losses for the validation model
-
-        Returns
-        -------
-            FitState
+    def register(self, epoch, fitstate):
+        """Save the current fitstate and the associated epoch
+        and set the current epoch as the final one should the fit end now
         """
-        # Save all the information in a fitstate object
-        fitstate = FitState(training_info, validation_info)
         self.final_epoch = epoch
         self._history.append(fitstate)
-        return fitstate
 
 
 class Stopping:
@@ -425,8 +417,8 @@ def monitor_chi2(self, training_info, epoch, print_stats=False):
         Parameters
         ----------
             training_info: dict
-                output of a .fit() call, dictionary of the total loss (summed over replicas) for
-                each experiment
+                output of a .fit() call, dictionary of the total training loss
+                (summed over replicas and experiments)
             epoch: int
                 index of the epoch
 
@@ -436,7 +428,7 @@ def monitor_chi2(self, training_info, epoch, print_stats=False):
                 true/false according to the status of the run
         """
         # Step 1. Check whether the fit has NaN'd and stop it if so
-        if np.isnan(training_info["loss"]):
+        if np.isnan(training_loss := training_info["loss"]):
             log.warning(" > NaN found, stopping activated")
             self.make_stop()
             return False
@@ -445,7 +437,9 @@ def monitor_chi2(self, training_info, epoch, print_stats=False):
         validation_info = self._validation.compute_losses()
 
         # Step 3. Register the current point in (the) history
-        fitstate = self._history.register(epoch, training_info, validation_info)
+        # and set the current final epoch as the current one
+        fitstate = FitState(None, validation_info, training_loss)
+        self._history.register(epoch, fitstate)
         if print_stats:
             self.print_current_stats(epoch, fitstate)
 
@@ -496,21 +490,23 @@ def _restore_best_weights(self):
 
     def print_current_stats(self, epoch, fitstate):
         """
-        Prints ``fitstate`` training and validation chi2s
+        Prints ``fitstate`` validation chi2 for every experiment
+        and the current total training loss as well as the validation loss
+        after the training step
         """
         epoch_index = epoch + 1
-        tr_chi2 = fitstate.total_tr_chi2()
         vl_chi2 = fitstate.total_vl_chi2()
-        total_str = f"At epoch {epoch_index}/{self.total_epochs}, total chi2: {tr_chi2}\n"
+        total_str = f"""Epoch {epoch_index}/{self.total_epochs}: loss: {fitstate.tr_loss:.7f}
+Validation loss after training step: {vl_chi2:.7f}.
+Validation chi2s: """
 
         # The partial chi2 makes no sense for more than one replica at once:
         if self._n_replicas == 1:
-            partial_tr_chi2 = fitstate.total_partial_tr_chi2()
+            partial_vl_chi2 = fitstate.total_partial_vl_chi2()
             partials = []
-            for experiment, chi2 in partial_tr_chi2.items():
+            for experiment, chi2 in partial_vl_chi2.items():
                 partials.append(f"{experiment}: {chi2:.3f}")
-            total_str += ", ".join(partials) + "\n"
-        total_str += f"Validation chi2 at this point: {vl_chi2}"
+            total_str += ", ".join(partials)
         log.info(total_str)
 
     def stop_here(self):
@@ -525,6 +521,7 @@ def stop_here(self):
     def chi2exps_json(self, i_replica=0, log_each=100):
         """
         Returns and apt-for-json dictionary with the status of the fit every `log_each` epochs
+        It reports the total training loss and the validation loss broken down by experiment.
 
         Parameters
         ----------
@@ -543,16 +540,14 @@ def chi2exps_json(self, i_replica=0, log_each=100):
 
         for epoch in range(log_each - 1, final_epoch + 1, log_each):
             fitstate = self._history.get_state(epoch)
-            all_tr = fitstate.all_tr_chi2_for_replica(i_replica)
-            all_vl = fitstate.all_vl_chi2_for_replica(i_replica)
+            # Get the training and validation losses
+            tmp = {"training_loss": fitstate.tr_loss, "validation_loss": fitstate.vl_loss.tolist()}
 
-            tmp = {exp: {"training": tr_chi2} for exp, tr_chi2 in all_tr.items()}
-            for exp, vl_chi2 in all_vl.items():
-                if exp not in tmp:
-                    tmp[exp] = {"training": None}
-                tmp[exp]["validation"] = vl_chi2
+            # And the validation chi2 broken down by experiment
 
+            tmp["validation_chi2s"] = fitstate.all_vl_chi2_for_replica(i_replica)
             json_dict[epoch + 1] = tmp
+
         return json_dict
 
 
@@ -586,6 +581,7 @@ def check_positivity(self, history_object):
                 otherwise, it passes.
                 It returns an array booleans which are True if positivity passed
         story_object[key_loss] < self.threshold
+
                 Parameters
                 ----------
                     history_object: dict