diff --git a/n3fit/runcards/examples/developing_weights.h5 b/n3fit/runcards/examples/developing_weights.h5
index 542fca06f9..2749393b42 100644
Binary files a/n3fit/runcards/examples/developing_weights.h5 and b/n3fit/runcards/examples/developing_weights.h5 differ
diff --git a/n3fit/src/n3fit/backends/__init__.py b/n3fit/src/n3fit/backends/__init__.py
index f49bbd0f53..3c48317a86 100644
--- a/n3fit/src/n3fit/backends/__init__.py
+++ b/n3fit/src/n3fit/backends/__init__.py
@@ -1,20 +1,23 @@
-from n3fit.backends.keras_backend.internal_state import (
-    set_initial_state,
-    clear_backend_state,
-    set_eager
-)
+from n3fit.backends.keras_backend import callbacks, constraints, operations
 from n3fit.backends.keras_backend.MetaLayer import MetaLayer
-from n3fit.backends.keras_backend.MetaModel import MetaModel
+from n3fit.backends.keras_backend.MetaModel import (
+    NN_LAYER_ALL_REPLICAS,
+    NN_PREFIX,
+    PREPROCESSING_LAYER_ALL_REPLICAS,
+    MetaModel,
+)
 from n3fit.backends.keras_backend.base_layers import (
+    Concatenate,
     Input,
-    concatenate,
     Lambda,
     base_layer_selector,
+    concatenate,
     regularizer_selector,
-    Concatenate,
 )
-from n3fit.backends.keras_backend import operations
-from n3fit.backends.keras_backend import constraints
-from n3fit.backends.keras_backend import callbacks
+from n3fit.backends.keras_backend.internal_state import (
+    clear_backend_state,
+    set_eager,
+    set_initial_state,
+)
 
 print("Using Keras backend")
diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
index 1b0990bb03..48b4e7e3aa 100644
--- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
+++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
@@ -5,6 +5,7 @@
     backend-dependent calls.
 """
 
+import logging
 import re
 
 import h5py
@@ -16,6 +17,8 @@
 
 import n3fit.backends.keras_backend.operations as op
 
+log = logging.getLogger(__name__)
+
 # Check the TF version to check if legacy-mode is needed (TF < 2.2)
 tf_version = tf.__version__.split(".")
 if int(tf_version[0]) == 2 and int(tf_version[1]) < 2:
@@ -46,7 +49,8 @@
 }
 
 NN_PREFIX = "NN"
-PREPROCESSING_PREFIX = "preprocessing_factor"
+NN_LAYER_ALL_REPLICAS = "all_NNs"
+PREPROCESSING_LAYER_ALL_REPLICAS = "preprocessing_factor"
 
 # Some keys need to work for everyone
 for k, v in optimizers.items():
@@ -156,7 +160,7 @@ def perform_fit(self, x=None, y=None, epochs=1, **kwargs):
         of the model (the loss functions) to the partial losses.
 
         If the model was compiled with input and output data, they will not be passed through.
-        In this case by default the number of `epochs` will be set to 1
+        In this case by default the number of ``epochs`` will be set to 1
 
         ex:
             {'loss': [100], 'dataset_a_loss1' : [67], 'dataset_2_loss': [33]}
@@ -169,10 +173,36 @@ def perform_fit(self, x=None, y=None, epochs=1, **kwargs):
         x_params = self._parse_input(x)
         if y is None:
             y = self.target_tensors
-        history = super().fit(x=x_params, y=y, epochs=epochs, **kwargs)
+
+        # Avoids Tensorflow overhead that happens at every epoch, by putting multiple steps in an epoch
+        steps_per_epoch = self.determine_steps_per_epoch(epochs)
+
+        for k, v in x_params.items():
+            x_params[k] = tf.repeat(v, steps_per_epoch, axis=0)
+        y = [tf.repeat(yi, steps_per_epoch, axis=0) for yi in y]
+
+        history = super().fit(
+            x=x_params, y=y, epochs=epochs // steps_per_epoch, batch_size=1, **kwargs
+        )
         loss_dict = history.history
         return loss_dict
 
+    def determine_steps_per_epoch(self, epochs):
+        num_replicas = self.output_shape[0][0]
+        # in this case we're most likely running on the CPU and this is not worth it
+        if num_replicas == 1:
+            return 1
+
+        # On the GPU, run with
+        for divisor in [10, 100]:
+            if epochs % divisor != 0:
+                steps_per_epoch = divisor // 10
+                log.warning(
+                    f"Epochs {epochs} not divisible by {divisor}, using {steps_per_epoch} steps per epoch"
+                )
+                return steps_per_epoch
+        return 100
+
     def predict(self, x=None, **kwargs):
         """Call super().predict with the right input arguments"""
         x = self._parse_input(x)
@@ -198,10 +228,15 @@ def compute_losses(self):
             out_names = [f"{i}_loss" for i in self.output_names]
             out_names.insert(0, "loss")
 
+            inputs = self._parse_input(None)
+            # get rid of the repetitions by number of epochs made in perform_fit
+            for k, v in inputs.items():
+                inputs[k] = v[:1]
+
             # Compile a evaluation function
             @tf.function
             def losses_fun():
-                predictions = self(self._parse_input(None))
+                predictions = self(inputs)
                 # If we only have one dataset the output changes
                 if len(out_names) == 2:
                     predictions = [predictions]
@@ -228,7 +263,7 @@ def compile(
     ):
         """
         Compile the model given an optimizer and a list of loss functions.
-        The optimizer must be one of those implemented in the `optimizer` attribute of this class.
+        The optimizer must be one of those implemented in the ``optimizer`` attribute of this class.
 
         Options:
             - A learning rate and a list of target outpout can be defined.
@@ -353,14 +388,10 @@ def get_replica_weights(self, i_replica):
             dict
                 dictionary with the weights of the replica
         """
-        NN_weights = [
-            tf.Variable(w, name=w.name) for w in self.get_layer(f"{NN_PREFIX}_{i_replica}").weights
-        ]
-        prepro_weights = [
-            tf.Variable(w, name=w.name)
-            for w in self.get_layer(f"{PREPROCESSING_PREFIX}_{i_replica}").weights
-        ]
-        weights = {NN_PREFIX: NN_weights, PREPROCESSING_PREFIX: prepro_weights}
+        weights = {}
+        for layer_type in [NN_LAYER_ALL_REPLICAS, PREPROCESSING_LAYER_ALL_REPLICAS]:
+            layer = self.get_layer(layer_type)
+            weights[layer_type] = get_layer_replica_weights(layer, i_replica)
 
         return weights
 
@@ -378,10 +409,9 @@ def set_replica_weights(self, weights, i_replica=0):
             i_replica: int
                 the replica number to set, defaulting to 0
         """
-        self.get_layer(f"{NN_PREFIX}_{i_replica}").set_weights(weights[NN_PREFIX])
-        self.get_layer(f"{PREPROCESSING_PREFIX}_{i_replica}").set_weights(
-            weights[PREPROCESSING_PREFIX]
-        )
+        for layer_type in [NN_LAYER_ALL_REPLICAS, PREPROCESSING_LAYER_ALL_REPLICAS]:
+            layer = self.get_layer(layer_type)
+            set_layer_replica_weights(layer=layer, weights=weights[layer_type], i_replica=i_replica)
 
     def split_replicas(self):
         """
@@ -411,51 +441,84 @@ def load_identical_replicas(self, model_file):
         """
         From a single replica model, load the same weights into all replicas.
         """
-        weights = self._format_weights_from_file(model_file)
+        single_replica = self.single_replica_generator()
+        single_replica.load_weights(model_file)
+        weights = single_replica.get_replica_weights(0)
 
         for i_replica in range(self.num_replicas):
             self.set_replica_weights(weights, i_replica)
 
-    def _format_weights_from_file(self, model_file):
-        """Read weights from a .h5 file and format into a dictionary of tf.Variables"""
-        weights = {}
 
-        with h5py.File(model_file, 'r') as f:
-            # look at layers of the form NN_i and take the lowest i
-            i_replica = 0
-            while f"{NN_PREFIX}_{i_replica}" not in f:
-                i_replica += 1
+def is_stacked_single_replicas(layer):
+    """
+    Check if the layer consists of stacked single replicas (Only happens for NN layers),
+    to determine how to extract single replica weights.
 
-            weights[NN_PREFIX] = self._extract_weights(
-                f[f"{NN_PREFIX}_{i_replica}"], NN_PREFIX, i_replica
-            )
-            weights[PREPROCESSING_PREFIX] = self._extract_weights(
-                f[f"{PREPROCESSING_PREFIX}_{i_replica}"], PREPROCESSING_PREFIX, i_replica
-            )
+    Parameters
+    ----------
+        layer: MetaLayer
+            the layer to check
 
-        return weights
+    Returns
+    -------
+        bool
+            True if the layer consists of stacked single replicas
+    """
+    if not isinstance(layer, MetaModel):
+        return False
+    return f"{NN_PREFIX}_0" in [sublayer.name for sublayer in layer.layers]
 
-    def _extract_weights(self, h5_group, weights_key, i_replica):
-        """Extract weights from a h5py group, turning them into Tensorflow variables"""
-        weights = []
 
-        def append_weights(name, node):
-            if isinstance(node, h5py.Dataset):
-                weight_name = node.name.split("/", 2)[-1]
-                weight_name = weight_name.replace(f"{NN_PREFIX}_{i_replica}", f"{NN_PREFIX}_0")
-                weight_name = weight_name.replace(
-                    f"{PREPROCESSING_PREFIX}_{i_replica}", f"{PREPROCESSING_PREFIX}_0"
-                )
-                weights.append(tf.Variable(node[()], name=weight_name))
+def get_layer_replica_weights(layer, i_replica: int):
+    """
+    Get the weights for the given single replica ``i_replica``,
+    from a ``layer`` that has weights for all replicas.
+
+    Note that the layer could be a complete  a complete NN with many separated sub_layers
+    each of which containing weights for all replicas together.
+    This functions separates the per-replica weights and returns the list of weight as if the
+    input ``layer`` were made of _only_ replica ``i_replica``.
+    Parameters
+    ----------
+        layer: MetaLayer
+            the layer to get the weights from
+        i_replica: int
+            the replica number
+
+    Returns
+    -------
+        weights: list
+            list of weights for the replica
+    """
+    if is_stacked_single_replicas(layer):
+        weights = layer.get_layer(f"{NN_PREFIX}_{i_replica}").weights
+    else:
+        weights = [tf.Variable(w[i_replica : i_replica + 1], name=w.name) for w in layer.weights]
 
-        h5_group.visititems(append_weights)
+    return weights
+
+
+def set_layer_replica_weights(layer, weights, i_replica: int):
+    """
+    Set the weights for the given single replica ``i_replica``.
+    When the input ``layer`` contains weights for many replicas, ensures that
+    only those corresponding to replica ``i_replica`` are updated.
+
+    Parameters
+    ----------
+        layer: MetaLayer
+            the layer to set the weights for
+        weights: list
+            list of weights for the replica
+        i_replica: int
+            the replica number
+    """
+    if is_stacked_single_replicas(layer):
+        layer.get_layer(f"{NN_PREFIX}_{i_replica}").set_weights(weights)
+        return
 
-        # have to put them in the same order
-        weights_ordered = []
-        weights_model_order = [w.name for w in self.get_replica_weights(0)[weights_key]]
-        for w in weights_model_order:
-            for w_h5 in weights:
-                if w_h5.name == w:
-                    weights_ordered.append(w_h5)
+    full_weights = [w.numpy() for w in layer.weights]
+    for w_old, w_new in zip(full_weights, weights):
+        w_old[i_replica : i_replica + 1] = w_new
 
-        return weights_ordered
+    layer.set_weights(full_weights)
diff --git a/n3fit/src/n3fit/backends/keras_backend/base_layers.py b/n3fit/src/n3fit/backends/keras_backend/base_layers.py
index 14215e9cf6..36a2016fee 100644
--- a/n3fit/src/n3fit/backends/keras_backend/base_layers.py
+++ b/n3fit/src/n3fit/backends/keras_backend/base_layers.py
@@ -17,34 +17,44 @@
     The names of the layer and the activation function are the ones to be used in the n3fit runcard.
 """
 
-from tensorflow.keras.layers import Lambda, LSTM, Dropout, Concatenate
-from tensorflow.keras.layers import concatenate, Input # pylint: disable=unused-import
+from tensorflow import expand_dims, math, nn
+from tensorflow.keras.layers import (  # pylint: disable=unused-import
+    Dropout,
+    Input,
+    Lambda,
+    concatenate,
+)
 from tensorflow.keras.layers import Dense as KerasDense
-from tensorflow import expand_dims
+from tensorflow.keras.layers import LSTM, Concatenate  # pylint: disable=unused-import
 from tensorflow.keras.regularizers import l1_l2
-from tensorflow import nn, math
 
 from n3fit.backends import MetaLayer
+from n3fit.backends.keras_backend.multi_dense import MultiDense
+
 
 # Custom activation functions
 def square_activation(x):
-    """ Squares the input """
-    return x*x
+    """Squares the input"""
+    return x * x
+
 
 def modified_tanh(x):
-    """ A non-saturating version of the tanh function """
-    return math.abs(x)*nn.tanh(x)
+    """A non-saturating version of the tanh function"""
+    return math.abs(x) * nn.tanh(x)
+
 
 def leaky_relu(x):
-    """ Computes the Leaky ReLU activation function """
+    """Computes the Leaky ReLU activation function"""
     return nn.leaky_relu(x, alpha=0.2)
 
+
 custom_activations = {
-    "square" : square_activation,
+    "square": square_activation,
     "leaky_relu": leaky_relu,
     "modified_tanh": modified_tanh,
 }
 
+
 def LSTM_modified(**kwargs):
     """
     LSTM asks for a sample X timestep X features kind of thing so we need to reshape the input
@@ -61,9 +71,11 @@ def ReshapedLSTM(input_tensor):
 
     return ReshapedLSTM
 
+
 class Dense(KerasDense, MetaLayer):
     pass
 
+
 def dense_per_flavour(basis_size=8, kernel_initializer="glorot_normal", **dense_kwargs):
     """
     Generates a list of layers which can take as an input either one single layer
@@ -85,7 +97,7 @@ def dense_per_flavour(basis_size=8, kernel_initializer="glorot_normal", **dense_
 
     # Need to generate a list of dense layers
     dense_basis = [
-        base_layer_selector("dense", kernel_initializer=initializer, **dense_kwargs)
+        base_layer_selector("single_dense", kernel_initializer=initializer, **dense_kwargs)
         for initializer in kernel_initializer
     ]
 
@@ -116,13 +128,26 @@ def apply_dense(xinput):
 
 layers = {
     "dense": (
+        MultiDense,
+        {
+            "input_shape": (1,),
+            "replica_seeds": None,
+            "kernel_initializer": "glorot_normal",
+            "units": 5,
+            "activation": "sigmoid",
+            "kernel_regularizer": None,
+            "replica_input": True,
+        },
+    ),
+    # This one is only used inside dense_per_flavour
+    "single_dense": (
         Dense,
         {
             "input_shape": (1,),
             "kernel_initializer": "glorot_normal",
             "units": 5,
             "activation": "sigmoid",
-            "kernel_regularizer": None
+            "kernel_regularizer": None,
         },
     ),
     "dense_per_flavour": (
@@ -143,31 +168,28 @@ def apply_dense(xinput):
     "concatenate": (Concatenate, {}),
 }
 
-regularizers = {
-    'l1_l2': (l1_l2, {'l1': 0., 'l2': 0.})
-    }
+regularizers = {'l1_l2': (l1_l2, {'l1': 0.0, 'l2': 0.0})}
+
 
 def base_layer_selector(layer_name, **kwargs):
     """
-        Given a layer name, looks for it in the `layers` dictionary and returns an instance.
+    Given a layer name, looks for it in the `layers` dictionary and returns an instance.
 
-        The layer dictionary defines a number of defaults
-        but they can be overwritten/enhanced through kwargs
+    The layer dictionary defines a number of defaults
+    but they can be overwritten/enhanced through kwargs
 
-        Parameters
-        ----------
-            `layer_name
-                str with the name of the layer
-            `**kwargs`
-                extra optional arguments to pass to the layer (beyond their defaults)
+    Parameters
+    ----------
+        `layer_name
+            str with the name of the layer
+        `**kwargs`
+            extra optional arguments to pass to the layer (beyond their defaults)
     """
     try:
         layer_tuple = layers[layer_name]
     except KeyError as e:
         raise NotImplementedError(
-            "Layer not implemented in keras_backend/base_layers.py: {0}".format(
-                layer_name
-            )
+            "Layer not implemented in keras_backend/base_layers.py: {0}".format(layer_name)
         ) from e
 
     layer_class = layer_tuple[0]
@@ -182,6 +204,7 @@ def base_layer_selector(layer_name, **kwargs):
 
     return layer_class(**layer_args)
 
+
 def regularizer_selector(reg_name, **kwargs):
     """Given a regularizer name looks in the `regularizer` dictionary and
     return an instance.
@@ -204,7 +227,8 @@ def regularizer_selector(reg_name, **kwargs):
         reg_tuple = regularizers[reg_name]
     except KeyError:
         raise NotImplementedError(
-            "Regularizer not implemented in keras_backend/base_layers.py: {0}".format(reg_name))
+            "Regularizer not implemented in keras_backend/base_layers.py: {0}".format(reg_name)
+        )
 
     reg_class = reg_tuple[0]
     reg_args = reg_tuple[1]
diff --git a/n3fit/src/n3fit/backends/keras_backend/callbacks.py b/n3fit/src/n3fit/backends/keras_backend/callbacks.py
index 7349d6be36..d49089c0e7 100644
--- a/n3fit/src/n3fit/backends/keras_backend/callbacks.py
+++ b/n3fit/src/n3fit/backends/keras_backend/callbacks.py
@@ -4,20 +4,64 @@
     The callbacks defined in this module can be passed to the ``callbacks`` argument
     of the ``perform_fit`` method as a list.
 
-    For the most typical usage: ``on_epoch_end``,
+    For the most typical usage: ``on_batch_end``,
     they must take as input an epoch number and a log of the partial losses.
+
+    Note: the terminology used everywhere refers to a single training step as a single epoch.
+    It turns out that to avoid tensorflow overhead, it is beneficial to write a step as a
+    single batch instead. So callbacks must use ``on_batch_end``.
 """
 
 import logging
 from time import time
+
 import numpy as np
 import tensorflow as tf
-from tensorflow.keras.callbacks import TensorBoard, Callback
+from tensorflow.keras.callbacks import Callback, TensorBoard
 
 log = logging.getLogger(__name__)
 
 
-class TimerCallback(Callback):
+class CallbackStep(Callback):
+    """
+    Wrapper around the keras Callback that keeps track of how the steps are divided
+    between epochs and batches.
+    The callback will call ``on_step_end`` instead of ``on_batch_end``.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.steps_in_epoch = 0
+        self.epochs_finished = 0
+        self.steps_per_epoch = 0  # will be defined in the first epoch
+        self._previous_logs = {}
+
+    def on_epoch_end(self, epoch, logs=None):
+        if self.steps_per_epoch == 0:
+            self.steps_per_epoch = self.steps_in_epoch
+        self.steps_in_epoch = 0
+        self.epochs_finished += 1
+
+    def on_batch_end(self, batch, logs=None):
+        step_number = self.steps_in_epoch + self.epochs_finished * self.steps_per_epoch
+        self.on_step_end(step_number, logs)
+        self.steps_in_epoch += 1
+
+    def correct_logs(self, logs: dict) -> dict:
+        """
+        The logs that get computed by default are an average over batches.
+        This converts it into the logs for the current step.
+        """
+        corrected_logs = {}
+        for k in logs.keys():
+            previous_total = self._previous_logs.get(k, 0.0) * self.steps_in_epoch
+            current_total = logs[k] * (self.steps_in_epoch + 1)
+            corrected_logs[k] = current_total - previous_total
+        self._previous_logs = logs
+        return corrected_logs
+
+
+class TimerCallback(CallbackStep):
     """Callback to be used during debugging to time the fit"""
 
     def __init__(self, count_range=100):
@@ -29,8 +73,8 @@ def __init__(self, count_range=100):
         self.starting_time = None
         self.last_time = 0
 
-    def on_epoch_end(self, epoch, logs=None):
-        """ At the end of every epoch it checks the time """
+    def on_step_end(self, epoch, logs=None):
+        """At the end of every epoch it checks the time"""
         new_time = time()
         if epoch == 0:
             # The first epoch is only useful for starting
@@ -45,18 +89,18 @@ def on_epoch_end(self, epoch, logs=None):
         self.last_time = new_time
 
     def on_train_end(self, logs=None):
-        """ Print the results """
+        """Print the results"""
         total_time = time() - self.starting_time
         n_times = len(self.all_times)
         # Skip the first 100 epochs to avoid fluctuations due to compilations of part of the code
         # by epoch 100 all parts of the code have usually been called so it's a good compromise
-        mean = np.mean(self.all_times[min(110, n_times-1):])
-        std = np.std(self.all_times[min(110, n_times-1):])
+        mean = np.mean(self.all_times[min(110, n_times - 1) :])
+        std = np.std(self.all_times[min(110, n_times - 1) :])
         log.info(f"> > Average time per epoch: {mean:.5} +- {std:.5} s")
         log.info(f"> > > Total time: {total_time/60:.5} min")
 
 
-class StoppingCallback(Callback):
+class StoppingCallback(CallbackStep):
     """
     Given a ``stopping_object``, the callback will monitor the validation chi2
     and will stop the training model when the conditions given by ``stopping_object``
@@ -76,10 +120,11 @@ def __init__(self, stopping_object, log_freq=100):
         self.log_freq = log_freq
         self.stopping_object = stopping_object
 
-    def on_epoch_end(self, epoch, logs=None):
-        """ Function to be called at the end of every epoch """
+    def on_step_end(self, epoch, logs=None):
+        """Function to be called at the end of every epoch"""
         print_stats = ((epoch + 1) % self.log_freq) == 0
         # Note that the input logs correspond to the fit before the weights are updated
+        logs = self.correct_logs(logs)
         self.stopping_object.monitor_chi2(logs, epoch, print_stats=print_stats)
         if self.stopping_object.stop_here():
             self.model.stop_training = True
@@ -92,7 +137,7 @@ def on_train_end(self, logs=None):
         self.stopping_object.make_stop()
 
 
-class LagrangeCallback(Callback):
+class LagrangeCallback(CallbackStep):
     """
     Updates the given datasets
     with its respective multipliers each ``update_freq`` epochs
@@ -117,7 +162,7 @@ def __init__(self, datasets, multipliers, update_freq=100):
         self.updateable_weights = []
 
     def on_train_begin(self, logs=None):
-        """ Save an instance of all relevant layers """
+        """Save an instance of all relevant layers"""
         for layer_name in self.datasets:
             layer = self.model.get_layer(layer_name)
             self.updateable_weights.append(layer.weights)
@@ -132,8 +177,8 @@ def _update_weights(self):
             for w in ws:
                 w.assign(w * multiplier)
 
-    def on_epoch_end(self, epoch, logs=None):
-        """ Function to be called at the end of every epoch """
+    def on_step_end(self, epoch, logs=None):
+        """Function to be called at the end of every epoch"""
         if (epoch + 1) % self.update_freq == 0:
             self._update_weights()
 
diff --git a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
new file mode 100644
index 0000000000..8640a72c00
--- /dev/null
+++ b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
@@ -0,0 +1,177 @@
+from typing import List
+
+import tensorflow as tf
+from tensorflow.keras.initializers import Initializer
+from tensorflow.keras.layers import Dense, Dropout
+
+
+class MultiDense(Dense):
+    """
+    Dense layer for multiple replicas at the same time.
+
+    Inputs to this layer may contain multiple replicas, for the later layers.
+    In this case, the `replica_input` argument should be set to `True`, which is the default.
+    The input shape in this case is (batch_size, replicas, gridsize, features).
+    For the first layer, there are no replicas yet, and so the `replica_input` argument
+    should be set to `False`.
+    The input shape in this case is (batch_size, gridsize, features).
+
+    Weights are initialized using a `replica_seeds` list of seeds, and are identical to the
+    weights of a list of single dense layers with the same `replica_seeds`.
+
+
+    Example
+    -------
+
+    >>> from tensorflow.keras import Sequential
+    >>> from tensorflow.keras.layers import Dense
+    >>> from tensorflow.keras.initializers import GlorotUniform
+    >>> import tensorflow as tf
+    >>> replicas = 2
+    >>> multi_dense_model = Sequential([
+    >>>     MultiDense(units=8, replica_seeds=[42, 43], replica_input=False, kernel_initializer=GlorotUniform(seed=0)),
+    >>>     MultiDense(units=4, replica_seeds=[52, 53], kernel_initializer=GlorotUniform(seed=0)),
+    >>>     ])
+    >>> single_models = [
+    >>>     Sequential([
+    >>>         Dense(units=8, kernel_initializer=GlorotUniform(seed=42 + r)),
+    >>>         Dense(units=4, kernel_initializer=GlorotUniform(seed=52 + r)),
+    >>>         ])
+    >>>     for r in range(replicas)
+    >>>     ]
+    >>> gridsize, features = 100, 2
+    >>> multi_dense_model.build(input_shape=(None, gridsize, features))
+    >>> for single_model in single_models:
+    >>>     single_model.build(input_shape=(None, gridsize, features))
+    >>> test_input = tf.random.uniform(shape=(1, gridsize, features))
+    >>> multi_dense_output = multi_dense_model(test_input)
+    >>> single_dense_output = tf.stack([single_model(test_input) for single_model in single_models], axis=1)
+    >>> tf.reduce_all(tf.equal(multi_dense_output, single_dense_output))
+
+    Parameters
+    ----------
+    replica_seeds: List[int]
+        List of seeds per replica for the kernel initializer.
+    kernel_initializer: Initializer
+        Initializer class for the kernel.
+    replica_input: bool (default: True)
+        Whether the input already contains multiple replicas.
+    """
+
+    def __init__(
+        self,
+        replica_seeds: List[int],
+        kernel_initializer: Initializer,
+        replica_input: bool = True,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.replicas = len(replica_seeds)
+        self.replica_seeds = replica_seeds
+        self.kernel_initializer = MultiInitializer(
+            single_initializer=kernel_initializer, replica_seeds=replica_seeds
+        )
+        self.bias_initializer = MultiInitializer(
+            single_initializer=self.bias_initializer, replica_seeds=replica_seeds
+        )
+        self.replica_input = replica_input
+
+    def build(self, input_shape):
+        input_dim = input_shape[-1]
+        self.kernel = self.add_weight(
+            name="kernel",
+            shape=(self.replicas, input_dim, self.units),
+            initializer=self.kernel_initializer,
+            regularizer=self.kernel_regularizer,
+            constraint=self.kernel_constraint,
+        )
+        if self.use_bias:
+            self.bias = self.add_weight(
+                name="bias",
+                shape=(self.replicas, 1, self.units),
+                initializer=self.bias_initializer,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+            )
+        else:
+            self.bias = None
+        self.input_spec.axes = {-1: input_dim}
+        self.built = True
+
+    def call(self, inputs):
+        """
+        Compute output of shape (batch_size, replicas, gridsize, units).
+
+        For the first layer, (self.replica_input is False), this is equivalent to
+        applying each replica separately and concatenating along the last axis.
+        If the input already contains multiple replica outputs, it is equivalent
+        to applying each replica to its corresponding input.
+        """
+        if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype:
+            inputs = tf.cast(inputs, dtype=self._compute_dtype_object)
+
+        input_axes = 'brnf' if self.replica_input else 'bnf'
+        einrule = input_axes + ',rfg->brng'
+        outputs = tf.einsum(einrule, inputs, self.kernel)
+
+        # Reshape the output back to the original ndim of the input.
+        if not tf.executing_eagerly():
+            output_shape = self.compute_output_shape(inputs.shape.as_list())
+            outputs.set_shape(output_shape)
+
+        if self.use_bias:
+            outputs = outputs + self.bias
+
+        if self.activation is not None:
+            outputs = self.activation(outputs)
+
+        return outputs
+
+    def compute_output_shape(self, input_shape):
+        # Remove the replica axis from the input shape.
+        if self.replica_input:
+            input_shape = input_shape[:1] + input_shape[2:]
+
+        output_shape = super().compute_output_shape(input_shape)
+
+        # Add back the replica axis to the output shape.
+        output_shape = output_shape[:1] + [self.replicas] + output_shape[1:]
+
+        return output_shape
+
+    def get_config(self):
+        config = super().get_config()
+        config.update({"replica_input": self.replica_input, "replica_seeds": self.replica_seeds})
+        return config
+
+
+class MultiInitializer(Initializer):
+    """
+    Multi replica initializer that exactly replicates a stack of single replica initializers.
+
+    Weights are stacked on the first axis, and per replica seeds are added to a base seed of the
+    given single replica initializer.
+
+    Parameters
+    ----------
+        single_initializer: Initializer
+            Initializer class for the kernel.
+        replica_seeds: List[int]
+            List of seeds per replica for the kernel initializer.
+    """
+
+    def __init__(self, single_initializer: Initializer, replica_seeds: List[int]):
+        self.single_initializer = single_initializer
+        self.base_seed = single_initializer.seed if hasattr(single_initializer, "seed") else None
+        self.replica_seeds = replica_seeds
+
+    def __call__(self, shape, dtype=None, **kwargs):
+        shape = shape[1:]  # Remove the replica axis from the shape.
+        per_replica_weights = []
+        for replica_seed in self.replica_seeds:
+            if self.base_seed is not None:
+                self.single_initializer.seed = self.base_seed + replica_seed
+
+            per_replica_weights.append(self.single_initializer(shape, dtype, **kwargs))
+
+        return tf.stack(per_replica_weights, axis=0)
diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py
index 7b509f0065..d93f3b1d9a 100644
--- a/n3fit/src/n3fit/checks.py
+++ b/n3fit/src/n3fit/checks.py
@@ -108,12 +108,29 @@ def check_initializer(initializer):
         raise CheckError(f"Initializer {initializer} not accepted by {MetaLayer}")
 
 
+def check_layer_type_implemented(parameters):
+    """Checks whether the layer_type is implemented"""
+    layer_type = parameters.get("layer_type")
+    implemented_types = ["dense", "dense_per_flavour"]
+    if layer_type not in implemented_types:
+        raise CheckError(
+            f"Layer type {layer_type} not implemented, must be one of {implemented_types}"
+        )
+
+
 def check_dropout(parameters):
     """Checks the dropout setup (positive and smaller than 1.0)"""
     dropout = parameters.get("dropout")
     if dropout is not None and not 0.0 <= dropout <= 1.0:
         raise CheckError(f"Dropout must be between 0 and 1, got: {dropout}")
 
+    layer_type = parameters.get("layer_type")
+    if dropout is not None and dropout > 0.0 and layer_type == "dense_per_flavour":
+        raise CheckError(
+            "Dropout is not compatible with the dense_per_flavour layer type, "
+            "please use instead `parameters::layer_type: dense`"
+        )
+
 
 def check_tensorboard(tensorboard):
     """Check that the tensorbard callback can be enabled correctly"""
@@ -168,6 +185,7 @@ def wrapper_check_NN(basis, tensorboard, save, load, parameters):
     check_consistent_layers(parameters)
     check_basis_with_layers(basis, parameters)
     check_stopping(parameters)
+    check_layer_type_implemented(parameters)
     check_dropout(parameters)
     check_lagrange_multipliers(parameters, "integrability")
     check_lagrange_multipliers(parameters, "positivity")
@@ -367,8 +385,8 @@ def check_consistent_parallel(parameters, parallel_models, same_trvl_per_replica
             "Replicas cannot be run in parallel with different training/validation "
             " masks, please set `same_trvl_per_replica` to True in the runcard"
         )
-    if parameters.get("layer_type") != "dense":
-        raise CheckError("Parallelization has only been tested with layer_type=='dense'")
+    if parameters.get("layer_type") == "dense_per_flavour":
+        raise CheckError("Parallelization has not been tested with layer_type=='dense_per_flavour'")
 
 
 @make_argcheck
@@ -409,10 +427,9 @@ def check_fiatlux_pdfs_id(replicas, fiatlux):
                 f"Cannot generate a photon replica with id larger than the number of replicas of the PDFs set {luxset.name}:\nreplica id={max_id}, replicas of {luxset.name} = {pdfs_ids}"
             )
 
+
 @make_argcheck
 def check_multireplica_qed(replicas, fiatlux):
     if fiatlux is not None:
         if len(replicas) > 1:
-            raise CheckError(
-                "At the moment, running a multireplica QED fits is not allowed."
-            )
+            raise CheckError("At the moment, running a multireplica QED fits is not allowed.")
diff --git a/n3fit/src/n3fit/layers/msr_normalization.py b/n3fit/src/n3fit/layers/msr_normalization.py
index 0755cad0b4..01a3648cb7 100644
--- a/n3fit/src/n3fit/layers/msr_normalization.py
+++ b/n3fit/src/n3fit/layers/msr_normalization.py
@@ -40,6 +40,7 @@ def __init__(self, mode: str = "ALL", replicas: int = 1, **kwargs):
         else:
             raise ValueError(f"Mode {mode} not accepted for sum rules")
 
+        self.replicas = replicas
         indices = []
         self.divisor_indices = []
         if self._msr_enabled:
@@ -83,6 +84,7 @@ def call(self, pdf_integrated, photon_integral):
         reshape = lambda x: op.transpose(x[0])
         y = reshape(pdf_integrated)
         photon_integral = reshape(photon_integral)
+
         numerators = []
 
         if self._msr_enabled:
@@ -96,8 +98,9 @@ def call(self, pdf_integrated, photon_integral):
         divisors = op.gather(y, self.divisor_indices, axis=0)
 
         # Fill in the rest of the flavours with 1
+        num_flavours = y.shape[0]
         norm_constants = op.scatter_to_one(
-            numerators / divisors, indices=self.indices, output_shape=y.shape
+            numerators / divisors, indices=self.indices, output_shape=(num_flavours, self.replicas)
         )
 
         return op.batchit(op.transpose(norm_constants), batch_dimension=1)
diff --git a/n3fit/src/n3fit/layers/preprocessing.py b/n3fit/src/n3fit/layers/preprocessing.py
index 77ea760607..f8ab1f8f55 100644
--- a/n3fit/src/n3fit/layers/preprocessing.py
+++ b/n3fit/src/n3fit/layers/preprocessing.py
@@ -33,6 +33,8 @@ class Preprocessing(MetaLayer):
             Whether large x preprocessing factor should be active
         seed: int
             seed for the initializer of the random alpha and beta values
+        num_replicas: int (default 1)
+            The number of replicas
     """
 
     def __init__(
@@ -40,6 +42,7 @@ def __init__(
         flav_info: Optional[list] = None,
         seed: int = 0,
         large_x: bool = True,
+        num_replicas: int = 1,
         **kwargs,
     ):
         if flav_info is None:
@@ -49,6 +52,8 @@ def __init__(
         self.flav_info = flav_info
         self.seed = seed
         self.large_x = large_x
+        self.num_replicas = num_replicas
+
         self.alphas = []
         self.betas = []
         super().__init__(**kwargs)
@@ -87,7 +92,7 @@ def generate_weight(self, name: str, kind: str, dictionary: dict, set_to_zero: b
         # Generate the new trainable (or not) parameter
         newpar = self.builder_helper(
             name=name,
-            kernel_shape=(1,),
+            kernel_shape=(self.num_replicas, 1),
             initializer=initializer,
             trainable=trainable,
             constraint=constraint,
@@ -117,9 +122,12 @@ def call(self, x):
 
         Returns
         -------
-            prefactor: tensor(shape=[1,N,F])
+            prefactor: tensor(shape=[1,R,N,F])
         """
-        alphas = op.stack(self.alphas, axis=1)
-        betas = op.stack(self.betas, axis=1)
+        # weight tensors of shape (R, 1, F)
+        alphas = op.stack(self.alphas, axis=-1)
+        betas = op.stack(self.betas, axis=-1)
+
+        x = op.batchit(x, batch_dimension=0)
 
         return x ** (1 - alphas) * (1 - x) ** betas
diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py
index ae6ed01182..5a69fe9396 100644
--- a/n3fit/src/n3fit/model_gen.py
+++ b/n3fit/src/n3fit/model_gen.py
@@ -14,7 +14,16 @@
 
 import numpy as np
 
-from n3fit.backends import Input, Lambda, MetaLayer, MetaModel, base_layer_selector
+from n3fit.backends import (
+    NN_LAYER_ALL_REPLICAS,
+    NN_PREFIX,
+    PREPROCESSING_LAYER_ALL_REPLICAS,
+    Input,
+    Lambda,
+    MetaLayer,
+    MetaModel,
+    base_layer_selector,
+)
 from n3fit.backends import operations as op
 from n3fit.backends import regularizer_selector
 from n3fit.layers import (
@@ -299,103 +308,6 @@ def observable_generator(
     return layer_info
 
 
-# Network generation functions
-def generate_dense_network(
-    nodes_in: int,
-    nodes: int,
-    activations: List[str],
-    initializer_name: str = "glorot_normal",
-    seed: int = 0,
-    dropout_rate: float = 0.0,
-    regularizer: str = None,
-):
-    """
-    Generates a dense network
-
-    the dropout rate, if selected, is set
-    for the next to last layer (i.e., the last layer of the dense network before getting to
-    the output layer for the basis choice)
-    """
-    list_of_pdf_layers = []
-    number_of_layers = len(nodes)
-    if dropout_rate > 0:
-        dropout_layer = number_of_layers - 2
-    else:
-        dropout_layer = -1
-    for i, (nodes_out, activation) in enumerate(zip(nodes, activations)):
-        # if we have dropout set up, add it to the list
-        if dropout_rate > 0 and i == dropout_layer:
-            list_of_pdf_layers.append(base_layer_selector("dropout", rate=dropout_rate))
-
-        # select the initializer and move the seed
-        init = MetaLayer.select_initializer(initializer_name, seed=seed + i)
-
-        # set the arguments that will define the layer
-        arguments = {
-            "kernel_initializer": init,
-            "units": int(nodes_out),
-            "activation": activation,
-            "input_shape": (nodes_in,),
-            "kernel_regularizer": regularizer,
-        }
-
-        layer = base_layer_selector("dense", **arguments)
-
-        list_of_pdf_layers.append(layer)
-        nodes_in = int(nodes_out)
-    return list_of_pdf_layers
-
-
-def generate_dense_per_flavour_network(
-    nodes_in, nodes, activations, initializer_name="glorot_normal", seed=0, basis_size=8
-):
-    """
-    For each flavour generates a dense network of the chosen size
-
-    """
-    list_of_pdf_layers = []
-    number_of_layers = len(nodes)
-    current_seed = seed
-    for i, (nodes_out, activation) in enumerate(zip(nodes, activations)):
-        initializers = []
-        for _ in range(basis_size):
-            # select the initializer and move the seed
-            initializers.append(MetaLayer.select_initializer(initializer_name, seed=current_seed))
-            current_seed += 1
-
-        # set the arguments that will define the layer
-        # but careful, the last layer must be nodes = 1
-        # TODO the mismatch is due to the fact that basis_size
-        # is set to the number of nodes of the last layer when it should
-        # come from the runcard
-        if i == number_of_layers - 1:
-            nodes_out = 1
-        arguments = {
-            "kernel_initializer": initializers,
-            "units": nodes_out,
-            "activation": activation,
-            "input_shape": (nodes_in,),
-            "basis_size": basis_size,
-        }
-
-        layer = base_layer_selector("dense_per_flavour", **arguments)
-
-        if i == number_of_layers - 1:
-            # For the last layer, apply concatenate
-            concat = base_layer_selector("concatenate")
-
-            def output_layer(ilayer):
-                result = layer(ilayer)
-                return concat(result)
-
-            list_of_pdf_layers.append(output_layer)
-        else:
-            list_of_pdf_layers.append(layer)
-
-        nodes_in = int(nodes_out)
-    return list_of_pdf_layers
-
-
 def generate_pdf_model(
     nodes: List[int] = None,
     activations: List[str] = None,
@@ -669,67 +581,50 @@ def pdfNN_layer_generator(
     else:
         sumrule_layer = lambda x: x
 
-    # Only these layers change from replica to replica:
-    nn_replicas = []
-    preprocessing_factor_replicas = []
-    for i_replica, replica_seed in enumerate(seed):
-        preprocessing_factor_replicas.append(
-            Preprocessing(
-                flav_info=flav_info,
-                input_shape=(1,),
-                name=f"preprocessing_factor_{i_replica}",
-                seed=replica_seed + number_of_layers,
-                large_x=not subtract_one,
-            )
-        )
-        nn_replicas.append(
-            generate_nn(
-                layer_type=layer_type,
-                input_dimensions=nn_input_dimensions,
-                nodes=nodes,
-                activations=activations,
-                initializer_name=initializer_name,
-                replica_seed=replica_seed,
-                dropout=dropout,
-                regularizer=regularizer,
-                regularizer_args=regularizer_args,
-                last_layer_nodes=last_layer_nodes,
-                name=f"NN_{i_replica}",
-            )
-        )
+    compute_preprocessing_factor = Preprocessing(
+        flav_info=flav_info,
+        input_shape=(1,),
+        name=PREPROCESSING_LAYER_ALL_REPLICAS,
+        seed=seed[0] + number_of_layers,
+        large_x=not subtract_one,
+        num_replicas=num_replicas,
+    )
 
-    # Apply NN layers for all replicas to a given input grid
-    def neural_network_replicas(x, postfix=""):
-        NNs_x = Lambda(lambda nns: op.stack(nns, axis=1), name=f"NNs{postfix}")(
-            [nn(x) for nn in nn_replicas]
-        )
+    nn_replicas = generate_nn(
+        layer_type=layer_type,
+        nodes_in=nn_input_dimensions,
+        nodes=nodes,
+        activations=activations,
+        initializer_name=initializer_name,
+        replica_seeds=seed,
+        dropout=dropout,
+        regularizer=regularizer,
+        regularizer_args=regularizer_args,
+        last_layer_nodes=last_layer_nodes,
+    )
+
+    # The NN subtracted by NN(1), if applicable
+    def nn_subtracted(x):
+        NNs_x = nn_replicas(x)
 
         if subtract_one:
             x_eq_1_processed = process_input(layer_x_eq_1)
-            NNs_x_1 = Lambda(lambda nns: op.stack(nns, axis=1), name=f"NNs{postfix}_x_1")(
-                [nn(x_eq_1_processed) for nn in nn_replicas]
-            )
+            NNs_x_1 = nn_replicas(x_eq_1_processed)
             NNs_x = subtract_one_layer([NNs_x, NNs_x_1])
 
         return NNs_x
 
-    # Apply preprocessing factors for all replicas to a given input grid
-    def preprocessing_replicas(x, postfix=""):
-        return Lambda(lambda pfs: op.stack(pfs, axis=1), name=f"prefactors{postfix}")(
-            [pf(x) for pf in preprocessing_factor_replicas]
-        )
-
-    def compute_unnormalized_pdf(x, postfix=""):
+    def compute_unnormalized_pdf(x):
         # Preprocess the input grid
         x_nn_input = extract_nn_input(x)
         x_processed = process_input(x_nn_input)
         x_original = extract_original(x)
 
         # Compute the neural network output
-        NNs_x = neural_network_replicas(x_processed, postfix=postfix)
+        NNs_x = nn_subtracted(x_processed)
 
         # Compute the preprocessing factor
-        preprocessing_factors_x = preprocessing_replicas(x_original, postfix=postfix)
+        preprocessing_factors_x = compute_preprocessing_factor(x_original)
 
         # Apply the preprocessing factor
         pref_NNs_x = apply_preprocessing_factor([preprocessing_factors_x, NNs_x])
@@ -746,7 +641,7 @@ def compute_unnormalized_pdf(x, postfix=""):
     PDFs_unnormalized = compute_unnormalized_pdf(pdf_input)
 
     if impose_sumrule:
-        PDFs_integration_grid = compute_unnormalized_pdf(integrator_input, postfix="_x_integ")
+        PDFs_integration_grid = compute_unnormalized_pdf(integrator_input)
 
         if photons:
             # add batch and flavor dimensions
@@ -770,54 +665,150 @@ def compute_unnormalized_pdf(x, postfix=""):
     if photons:
         PDFs = layer_photon(PDFs)
 
-    if replica_axis:
-        pdf_model = MetaModel(model_input, PDFs, name=f"PDFs", scaler=scaler)
-    else:
-        pdf_model = MetaModel(model_input, PDFs[:, 0], name=f"PDFs", scaler=scaler)
+    if not replica_axis:
+        PDFs = Lambda(lambda pdfs: pdfs[:, 0], name="remove_replica_axis")(PDFs)
 
+    pdf_model = MetaModel(model_input, PDFs, name=f"PDFs", scaler=scaler)
     return pdf_model
 
 
 def generate_nn(
     layer_type: str,
-    input_dimensions: int,
+    nodes_in: int,
     nodes: List[int],
     activations: List[str],
     initializer_name: str,
-    replica_seed: int,
+    replica_seeds: List[int],
     dropout: float,
     regularizer: str,
     regularizer_args: dict,
     last_layer_nodes: int,
-    name: str,
 ) -> MetaModel:
     """
     Create the part of the model that contains all of the actual neural network
-    layers.
+    layers, for each replica.
+
+    Parameters
+    ----------
+        layer_type: str
+            Type of layer to use. Can be "dense" or "dense_per_flavour".
+        nodes_in: int
+            Number of nodes in the input layer.
+        nodes: List[int]
+            Number of nodes in each hidden layer.
+        activations: List[str]
+            Activation function to use in each hidden layer.
+        initializer_name: str
+            Name of the initializer to use.
+        replica_seeds: List[int]
+            List of seeds to use for each replica.
+        dropout: float
+            Dropout rate to use (if 0, no dropout is used).
+        regularizer: str
+            Name of the regularizer to use.
+        regularizer_args: dict
+            Arguments to pass to the regularizer.
+        last_layer_nodes: int
+            Number of nodes in the last layer.
+
+    Returns
+    -------
+        nn_replicas: MetaModel
+            Single model containing all replicas.
     """
-    common_args = {
-        'nodes_in': input_dimensions,
-        'nodes': nodes,
-        'activations': activations,
-        'initializer_name': initializer_name,
-        'seed': replica_seed,
-    }
-    if layer_type == "dense":
+    nodes_list = list(nodes)  # so we can modify it
+    x_input = Input(shape=(None, nodes_in), batch_size=1, name='xgrids_processed')
+
+    custom_args = {}
+    if layer_type == "dense_per_flavour":
+        # set the arguments that will define the layer
+        # but careful, the last layer must be nodes = 1
+        # TODO the mismatch is due to the fact that basis_size
+        # is set to the number of nodes of the last layer when it should
+        # come from the runcard
+        nodes_list[-1] = 1
+        basis_size = last_layer_nodes
+        custom_args['basis_size'] = basis_size
+
+        def initializer_generator(seed, i_layer):
+            seed += i_layer * basis_size
+            initializers = [
+                MetaLayer.select_initializer(initializer_name, seed=seed + b)
+                for b in range(basis_size)
+            ]
+            return initializers
+
+    else:  # "dense"
         reg = regularizer_selector(regularizer, **regularizer_args)
-        list_of_pdf_layers = generate_dense_network(
-            **common_args, dropout_rate=dropout, regularizer=reg
-        )
-    elif layer_type == "dense_per_flavour":
-        list_of_pdf_layers = generate_dense_per_flavour_network(
-            **common_args, basis_size=last_layer_nodes
-        )
+        custom_args['regularizer'] = reg
+
+        def initializer_generator(seed, i_layer):
+            seed += i_layer
+            return MetaLayer.select_initializer(initializer_name, seed=seed)
+
+    # First create all the layers...
+    # list_of_pdf_layers[d][r] is the layer at depth d for replica r
+    list_of_pdf_layers = []
+    for i_layer, (nodes_out, activation) in enumerate(zip(nodes_list, activations)):
+        if layer_type == "dense":
+            layers = base_layer_selector(
+                layer_type,
+                replica_seeds=replica_seeds,
+                kernel_initializer=initializer_generator(0, i_layer),
+                units=nodes_out,
+                activation=activation,
+                replica_input=(i_layer != 0),
+                **custom_args,
+            )
+        else:
+            layers = [
+                base_layer_selector(
+                    layer_type,
+                    kernel_initializer=initializer_generator(replica_seed, i_layer),
+                    units=nodes_out,
+                    activation=activation,
+                    input_shape=(nodes_in,),
+                    **custom_args,
+                )
+                for replica_seed in replica_seeds
+            ]
+        list_of_pdf_layers.append(layers)
+        nodes_in = int(nodes_out)
+
+    # add dropout as second to last layer
+    if dropout > 0:
+        dropout_layer = base_layer_selector("dropout", rate=dropout)
+        list_of_pdf_layers.insert(-2, dropout_layer)
+
+    # In case of per flavour network, concatenate at the last layer
+    if layer_type == "dense_per_flavour":
+        concat = base_layer_selector("concatenate")
+        list_of_pdf_layers[-1] = [lambda x: concat(layer(x)) for layer in list_of_pdf_layers[-1]]
+
+    # Apply all layers to the input to create the models
+    if layer_type == "dense":
+        pdfs = x_input
+        for layer in list_of_pdf_layers:
+            pdfs = layer(pdfs)
+        model = MetaModel({'NN_input': x_input}, pdfs, name=NN_LAYER_ALL_REPLICAS)
+
+        return model
+
+    pdfs = [layer(x_input) for layer in list_of_pdf_layers[0]]
+
+    for layers in list_of_pdf_layers[1:]:
+        # Since some layers (dropout) are shared, we have to treat them separately
+        if type(layers) is list:
+            pdfs = [layer(x) for layer, x in zip(layers, pdfs)]
+        else:
+            pdfs = [layers(x) for x in pdfs]
 
-    # Note: using a Sequential model would be more appropriate, but it would require
-    # creating a MetaSequential model.
-    x = Input(shape=(None, input_dimensions), batch_size=1, name='xgrids_processed')
-    pdf = x
-    for layer in list_of_pdf_layers:
-        pdf = layer(pdf)
+    # Wrap the pdfs in a MetaModel to enable getting/setting of weights later
+    pdfs = [
+        MetaModel({'NN_input': x_input}, pdf, name=f"{NN_PREFIX}_{i_replica}")(x_input)
+        for i_replica, pdf in enumerate(pdfs)
+    ]
+    pdfs = Lambda(lambda nns: op.stack(nns, axis=1), name=f"stack_replicas")(pdfs)
+    model = MetaModel({'NN_input': x_input}, pdfs, name=NN_LAYER_ALL_REPLICAS)
 
-    model = MetaModel({'NN_input': x}, pdf, name=name)
     return model
diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
index 99e9f013db..acbcc8b3cd 100644
--- a/n3fit/src/n3fit/model_trainer.py
+++ b/n3fit/src/n3fit/model_trainer.py
@@ -17,6 +17,7 @@
 from n3fit import model_gen
 from n3fit.backends import MetaModel, callbacks, clear_backend_state
 from n3fit.backends import operations as op
+from n3fit.backends import NN_LAYER_ALL_REPLICAS
 import n3fit.hyper_optimization.penalties
 import n3fit.hyper_optimization.rewards
 from n3fit.scaler import generate_scaler
@@ -454,7 +455,7 @@ def _model_generation(self, xinput, pdf_model, partition, partition_idx):
             training.summary()
             pdf_model = training.get_layer("PDFs")
             pdf_model.summary()
-            nn_model = pdf_model.get_layer("NN_0")
+            nn_model = pdf_model.get_layer(NN_LAYER_ALL_REPLICAS)
             nn_model.summary()
             # We may have fits without sumrules imposed
             try:
diff --git a/n3fit/src/n3fit/tests/regressions/weights_1.h5 b/n3fit/src/n3fit/tests/regressions/weights_1.h5
index 2c5b02e71e..9ffdaea7f3 100644
Binary files a/n3fit/src/n3fit/tests/regressions/weights_1.h5 and b/n3fit/src/n3fit/tests/regressions/weights_1.h5 differ
diff --git a/n3fit/src/n3fit/tests/regressions/weights_2.h5 b/n3fit/src/n3fit/tests/regressions/weights_2.h5
index e5a8adea86..18c593da9e 100644
Binary files a/n3fit/src/n3fit/tests/regressions/weights_2.h5 and b/n3fit/src/n3fit/tests/regressions/weights_2.h5 differ
diff --git a/n3fit/src/n3fit/tests/test_modelgen.py b/n3fit/src/n3fit/tests/test_modelgen.py
index 4cec4574ee..c87523f838 100644
--- a/n3fit/src/n3fit/tests/test_modelgen.py
+++ b/n3fit/src/n3fit/tests/test_modelgen.py
@@ -5,62 +5,50 @@
     It checks that both the number of layers and the shape
     of the weights of the layers are what is expected
 """
-import numpy as np
-import n3fit.model_gen
-from n3fit.backends import MetaModel
-from n3fit.backends import operations as op
+from n3fit.backends import NN_PREFIX
+from n3fit.model_gen import generate_nn
 
 INSIZE = 16
 OUT_SIZES = (4, 3)
 BASIS_SIZE = 3
 
+COMMON_ARGS = {
+    "nodes_in": INSIZE,
+    "nodes": OUT_SIZES,
+    "activations": ["sigmoid", "tanh"],
+    "initializer_name": "glorot_uniform",
+    "replica_seeds": [0],
+    "dropout": 0.0,
+    "regularizer": None,
+    "regularizer_args": {},
+    "last_layer_nodes": BASIS_SIZE,
+}
+
 
 def test_generate_dense_network():
-    nodes_in = INSIZE
-    nodes_out = OUT_SIZES
-    activations = ["sigmoid", "tanh"]
-    layers = n3fit.model_gen.generate_dense_network(nodes_in, nodes_out, activations)
-    arr = np.random.rand(1, INSIZE)
-    input_layer = op.numpy_to_input(arr)
-    curr_layer = input_layer
-    for layer in layers:
-        curr_layer = layer(curr_layer)
-    modelito = MetaModel({"input": input_layer}, curr_layer)
+    nn = generate_nn("dense", **COMMON_ARGS)
+
     # The number of layers should be input layer + len(OUT_SIZES)
-    assert len(modelito.layers) == len(OUT_SIZES) + 1
+    assert len(nn.layers) == len(OUT_SIZES) + 1
     # Check that the number of parameters is as expected
-    # We expect 4 weights where the two first ones are
-    # (INSIZE, OUT_SIZE[0]) (OUT_SIZE[0],)
-    # and the second one
-    # (OUT_SIZE[0], OUT_SIZE[1]) (OUT_SIZE[1],)
     expected_sizes = [
-        (INSIZE, OUT_SIZES[0]),
-        (OUT_SIZES[0],),
-        OUT_SIZES,
-        (OUT_SIZES[1],),
+        (1, INSIZE, OUT_SIZES[0]),
+        (1, 1, OUT_SIZES[0]),
+        (1, *OUT_SIZES),
+        (1, 1, OUT_SIZES[1]),
     ]
-    for weight, esize in zip(modelito.weights, expected_sizes):
+    for weight, esize in zip(nn.weights, expected_sizes):
         assert weight.shape == esize
 
 
 def test_generate_dense_per_flavour_network():
-    nodes_in = INSIZE
-    nodes_out = OUT_SIZES
-    activations = ["sigmoid", "tanh"]
-    layers = n3fit.model_gen.generate_dense_per_flavour_network(
-        nodes_in, nodes_out, activations, basis_size=BASIS_SIZE
-    )
-    arr = np.random.rand(1, INSIZE)
-    input_layer = op.numpy_to_input(arr)
-    curr_layer = input_layer
-    for layer in layers:
-        curr_layer = layer(curr_layer)
-    modelito = MetaModel({"input": input_layer}, curr_layer)
+    nn = generate_nn("dense_per_flavour", **COMMON_ARGS).get_layer(f"{NN_PREFIX}_0")
+
     # The number of layers should be input + BASIS_SIZE*len(OUT_SIZES) + concatenate
-    assert len(modelito.layers) == BASIS_SIZE * len(OUT_SIZES) + 2
+    assert len(nn.layers) == BASIS_SIZE * len(OUT_SIZES) + 2
     # The shape for this network of denses for flavours will depend on the basis_size
     expected_sizes = []
     expected_sizes += BASIS_SIZE * [(INSIZE, OUT_SIZES[0]), (OUT_SIZES[0],)]
     expected_sizes += BASIS_SIZE * [(OUT_SIZES[0], 1), (1,)]
-    for weight, esize in zip(modelito.weights, expected_sizes):
+    for weight, esize in zip(nn.weights, expected_sizes):
         assert weight.shape == esize
diff --git a/n3fit/src/n3fit/tests/test_multidense.py b/n3fit/src/n3fit/tests/test_multidense.py
new file mode 100644
index 0000000000..b912c0bd82
--- /dev/null
+++ b/n3fit/src/n3fit/tests/test_multidense.py
@@ -0,0 +1,68 @@
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras import Sequential
+from tensorflow.keras.initializers import GlorotUniform
+from tensorflow.keras.layers import Dense
+
+from n3fit.backends.keras_backend.multi_dense import MultiDense
+from n3fit.model_gen import generate_nn
+
+
+def test_multidense():
+    replicas = 2
+    multi_dense_model = Sequential(
+        [
+            MultiDense(
+                units=8,
+                replica_seeds=[42, 43],
+                replica_input=False,
+                kernel_initializer=GlorotUniform(seed=0),
+            ),
+            MultiDense(units=4, replica_seeds=[52, 53], kernel_initializer=GlorotUniform(seed=100)),
+        ]
+    )
+    single_models = [
+        Sequential(
+            [
+                Dense(units=8, kernel_initializer=GlorotUniform(seed=42 + r)),
+                Dense(units=4, kernel_initializer=GlorotUniform(seed=52 + r + 100)),
+            ]
+        )
+        for r in range(replicas)
+    ]
+
+    gridsize, features = 100, 3
+    multi_dense_model.build(input_shape=(None, gridsize, features))
+    for single_model in single_models:
+        single_model.build(input_shape=(None, gridsize, features))
+
+    test_input = tf.random.uniform(shape=(1, gridsize, features))
+    multi_dense_output = multi_dense_model(test_input)
+    single_dense_output = tf.stack(
+        [single_model(test_input) for single_model in single_models], axis=1
+    )
+
+    np.testing.assert_allclose(multi_dense_output, single_dense_output, atol=1e-6, rtol=1e-4)
+
+
+def test_initializers():
+    input_shape = (None, 3, 1)
+    dense_layers = []
+    for r in range(2):
+        dense_layer = Dense(units=2, kernel_initializer=GlorotUniform(seed=42 + r))
+        dense_layer.build(input_shape=input_shape)
+        dense_layers.append(dense_layer)
+    stacked_weights = tf.stack([dense_layer.weights[0] for dense_layer in dense_layers], axis=0)
+
+    multi_dense_layer = MultiDense(
+        units=2,
+        replica_seeds=[0, 1],
+        replica_input=False,
+        kernel_initializer=GlorotUniform(seed=42),
+    )
+    multi_dense_layer.build(input_shape=input_shape)
+
+    multi_dense_weights = multi_dense_layer.weights[0].numpy()
+    stacked_weights = stacked_weights.numpy()
+
+    np.testing.assert_allclose(multi_dense_weights, stacked_weights)
diff --git a/n3fit/src/n3fit/tests/test_preprocessing.py b/n3fit/src/n3fit/tests/test_preprocessing.py
index 3bf6a8966c..0280011343 100644
--- a/n3fit/src/n3fit/tests/test_preprocessing.py
+++ b/n3fit/src/n3fit/tests/test_preprocessing.py
@@ -22,45 +22,47 @@ def test_preprocessing():
     test_prefactors = [
         [
             [
-                3.7446213e-01,
-                1.9785003e-01,
-                2.7931085e-01,
-                2.0784079e-01,
-                4.5369801e-01,
-                2.7796263e-01,
-                5.4610312e-01,
-                2.4907256e-02,
-            ],
-            [
-                6.2252983e-04,
-                3.0504008e-05,
-                4.5713778e-03,
-                1.0905267e-03,
-                4.0506415e-02,
-                5.9004971e-05,
-                4.5114113e-03,
-                2.6757403e-09,
-            ],
-            [
-                4.1631009e-02,
-                1.0586979e-02,
-                8.3202787e-02,
-                4.3506064e-02,
-                2.2559988e-01,
-                1.5161950e-02,
-                1.0105091e-01,
-                1.4808348e-04,
-            ],
-            [
-                1.1616933e-01,
-                4.2717375e-02,
-                1.5620175e-01,
-                9.7478621e-02,
-                3.2600221e-01,
-                5.8901049e-02,
-                2.1937098e-01,
-                1.8343410e-03,
-            ],
+                [
+                    3.7446213e-01,
+                    1.9785003e-01,
+                    2.7931085e-01,
+                    2.0784079e-01,
+                    4.5369801e-01,
+                    2.7796263e-01,
+                    5.4610312e-01,
+                    2.4907256e-02,
+                ],
+                [
+                    6.2252983e-04,
+                    3.0504008e-05,
+                    4.5713778e-03,
+                    1.0905267e-03,
+                    4.0506415e-02,
+                    5.9004971e-05,
+                    4.5114113e-03,
+                    2.6757403e-09,
+                ],
+                [
+                    4.1631009e-02,
+                    1.0586979e-02,
+                    8.3202787e-02,
+                    4.3506064e-02,
+                    2.2559988e-01,
+                    1.5161950e-02,
+                    1.0105091e-01,
+                    1.4808348e-04,
+                ],
+                [
+                    1.1616933e-01,
+                    4.2717375e-02,
+                    1.5620175e-01,
+                    9.7478621e-02,
+                    3.2600221e-01,
+                    5.8901049e-02,
+                    2.1937098e-01,
+                    1.8343410e-03,
+                ],
+            ]
         ]
     ]
     prefactors = prepro(test_x)
diff --git a/n3fit/src/n3fit/vpinterface.py b/n3fit/src/n3fit/vpinterface.py
index 7e2e754e0f..12e5aca374 100644
--- a/n3fit/src/n3fit/vpinterface.py
+++ b/n3fit/src/n3fit/vpinterface.py
@@ -24,6 +24,7 @@
 import numpy as np
 import numpy.linalg as la
 
+from n3fit.backends import PREPROCESSING_LAYER_ALL_REPLICAS
 from validphys.arclength import arc_lengths, integrability_number
 from validphys.core import PDF, MCStats
 from validphys.lhapdfset import LHAPDFSet
@@ -224,13 +225,7 @@ def get_preprocessing_factors(self, replica=None):
         if replica is None:
             replica = 1
         # Replicas start counting in 1 so:
-        preprocessing_layers = self._models[replica - 1].get_layer_re(r"preprocessing_factor_\d")
-        if len(preprocessing_layers) > 1:
-            # We really don't want to fail at this point, but print a warning at least...
-            log.warning("More than one preprocessing layer found within the model!")
-        elif len(preprocessing_layers) < 1:
-            log.warning("No preprocessing layer found within the model!")
-        preprocessing_layer = preprocessing_layers[0]
+        preprocessing_layer = self._models[replica - 1].get_layer(PREPROCESSING_LAYER_ALL_REPLICAS)
 
         alphas_and_betas = None
         if self.fit_basis is not None: