diff --git a/n3fit/runcards/examples/developing_weights.h5 b/n3fit/runcards/examples/developing_weights.h5 index 542fca06f9..2749393b42 100644 Binary files a/n3fit/runcards/examples/developing_weights.h5 and b/n3fit/runcards/examples/developing_weights.h5 differ diff --git a/n3fit/src/n3fit/backends/__init__.py b/n3fit/src/n3fit/backends/__init__.py index f49bbd0f53..3c48317a86 100644 --- a/n3fit/src/n3fit/backends/__init__.py +++ b/n3fit/src/n3fit/backends/__init__.py @@ -1,20 +1,23 @@ -from n3fit.backends.keras_backend.internal_state import ( - set_initial_state, - clear_backend_state, - set_eager -) +from n3fit.backends.keras_backend import callbacks, constraints, operations from n3fit.backends.keras_backend.MetaLayer import MetaLayer -from n3fit.backends.keras_backend.MetaModel import MetaModel +from n3fit.backends.keras_backend.MetaModel import ( + NN_LAYER_ALL_REPLICAS, + NN_PREFIX, + PREPROCESSING_LAYER_ALL_REPLICAS, + MetaModel, +) from n3fit.backends.keras_backend.base_layers import ( + Concatenate, Input, - concatenate, Lambda, base_layer_selector, + concatenate, regularizer_selector, - Concatenate, ) -from n3fit.backends.keras_backend import operations -from n3fit.backends.keras_backend import constraints -from n3fit.backends.keras_backend import callbacks +from n3fit.backends.keras_backend.internal_state import ( + clear_backend_state, + set_eager, + set_initial_state, +) print("Using Keras backend") diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index 1b0990bb03..48b4e7e3aa 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -5,6 +5,7 @@ backend-dependent calls. """ +import logging import re import h5py @@ -16,6 +17,8 @@ import n3fit.backends.keras_backend.operations as op +log = logging.getLogger(__name__) + # Check the TF version to check if legacy-mode is needed (TF < 2.2) tf_version = tf.__version__.split(".") if int(tf_version[0]) == 2 and int(tf_version[1]) < 2: @@ -46,7 +49,8 @@ } NN_PREFIX = "NN" -PREPROCESSING_PREFIX = "preprocessing_factor" +NN_LAYER_ALL_REPLICAS = "all_NNs" +PREPROCESSING_LAYER_ALL_REPLICAS = "preprocessing_factor" # Some keys need to work for everyone for k, v in optimizers.items(): @@ -156,7 +160,7 @@ def perform_fit(self, x=None, y=None, epochs=1, **kwargs): of the model (the loss functions) to the partial losses. If the model was compiled with input and output data, they will not be passed through. - In this case by default the number of `epochs` will be set to 1 + In this case by default the number of ``epochs`` will be set to 1 ex: {'loss': [100], 'dataset_a_loss1' : [67], 'dataset_2_loss': [33]} @@ -169,10 +173,36 @@ def perform_fit(self, x=None, y=None, epochs=1, **kwargs): x_params = self._parse_input(x) if y is None: y = self.target_tensors - history = super().fit(x=x_params, y=y, epochs=epochs, **kwargs) + + # Avoids Tensorflow overhead that happens at every epoch, by putting multiple steps in an epoch + steps_per_epoch = self.determine_steps_per_epoch(epochs) + + for k, v in x_params.items(): + x_params[k] = tf.repeat(v, steps_per_epoch, axis=0) + y = [tf.repeat(yi, steps_per_epoch, axis=0) for yi in y] + + history = super().fit( + x=x_params, y=y, epochs=epochs // steps_per_epoch, batch_size=1, **kwargs + ) loss_dict = history.history return loss_dict + def determine_steps_per_epoch(self, epochs): + num_replicas = self.output_shape[0][0] + # in this case we're most likely running on the CPU and this is not worth it + if num_replicas == 1: + return 1 + + # On the GPU, run with + for divisor in [10, 100]: + if epochs % divisor != 0: + steps_per_epoch = divisor // 10 + log.warning( + f"Epochs {epochs} not divisible by {divisor}, using {steps_per_epoch} steps per epoch" + ) + return steps_per_epoch + return 100 + def predict(self, x=None, **kwargs): """Call super().predict with the right input arguments""" x = self._parse_input(x) @@ -198,10 +228,15 @@ def compute_losses(self): out_names = [f"{i}_loss" for i in self.output_names] out_names.insert(0, "loss") + inputs = self._parse_input(None) + # get rid of the repetitions by number of epochs made in perform_fit + for k, v in inputs.items(): + inputs[k] = v[:1] + # Compile a evaluation function @tf.function def losses_fun(): - predictions = self(self._parse_input(None)) + predictions = self(inputs) # If we only have one dataset the output changes if len(out_names) == 2: predictions = [predictions] @@ -228,7 +263,7 @@ def compile( ): """ Compile the model given an optimizer and a list of loss functions. - The optimizer must be one of those implemented in the `optimizer` attribute of this class. + The optimizer must be one of those implemented in the ``optimizer`` attribute of this class. Options: - A learning rate and a list of target outpout can be defined. @@ -353,14 +388,10 @@ def get_replica_weights(self, i_replica): dict dictionary with the weights of the replica """ - NN_weights = [ - tf.Variable(w, name=w.name) for w in self.get_layer(f"{NN_PREFIX}_{i_replica}").weights - ] - prepro_weights = [ - tf.Variable(w, name=w.name) - for w in self.get_layer(f"{PREPROCESSING_PREFIX}_{i_replica}").weights - ] - weights = {NN_PREFIX: NN_weights, PREPROCESSING_PREFIX: prepro_weights} + weights = {} + for layer_type in [NN_LAYER_ALL_REPLICAS, PREPROCESSING_LAYER_ALL_REPLICAS]: + layer = self.get_layer(layer_type) + weights[layer_type] = get_layer_replica_weights(layer, i_replica) return weights @@ -378,10 +409,9 @@ def set_replica_weights(self, weights, i_replica=0): i_replica: int the replica number to set, defaulting to 0 """ - self.get_layer(f"{NN_PREFIX}_{i_replica}").set_weights(weights[NN_PREFIX]) - self.get_layer(f"{PREPROCESSING_PREFIX}_{i_replica}").set_weights( - weights[PREPROCESSING_PREFIX] - ) + for layer_type in [NN_LAYER_ALL_REPLICAS, PREPROCESSING_LAYER_ALL_REPLICAS]: + layer = self.get_layer(layer_type) + set_layer_replica_weights(layer=layer, weights=weights[layer_type], i_replica=i_replica) def split_replicas(self): """ @@ -411,51 +441,84 @@ def load_identical_replicas(self, model_file): """ From a single replica model, load the same weights into all replicas. """ - weights = self._format_weights_from_file(model_file) + single_replica = self.single_replica_generator() + single_replica.load_weights(model_file) + weights = single_replica.get_replica_weights(0) for i_replica in range(self.num_replicas): self.set_replica_weights(weights, i_replica) - def _format_weights_from_file(self, model_file): - """Read weights from a .h5 file and format into a dictionary of tf.Variables""" - weights = {} - with h5py.File(model_file, 'r') as f: - # look at layers of the form NN_i and take the lowest i - i_replica = 0 - while f"{NN_PREFIX}_{i_replica}" not in f: - i_replica += 1 +def is_stacked_single_replicas(layer): + """ + Check if the layer consists of stacked single replicas (Only happens for NN layers), + to determine how to extract single replica weights. - weights[NN_PREFIX] = self._extract_weights( - f[f"{NN_PREFIX}_{i_replica}"], NN_PREFIX, i_replica - ) - weights[PREPROCESSING_PREFIX] = self._extract_weights( - f[f"{PREPROCESSING_PREFIX}_{i_replica}"], PREPROCESSING_PREFIX, i_replica - ) + Parameters + ---------- + layer: MetaLayer + the layer to check - return weights + Returns + ------- + bool + True if the layer consists of stacked single replicas + """ + if not isinstance(layer, MetaModel): + return False + return f"{NN_PREFIX}_0" in [sublayer.name for sublayer in layer.layers] - def _extract_weights(self, h5_group, weights_key, i_replica): - """Extract weights from a h5py group, turning them into Tensorflow variables""" - weights = [] - def append_weights(name, node): - if isinstance(node, h5py.Dataset): - weight_name = node.name.split("/", 2)[-1] - weight_name = weight_name.replace(f"{NN_PREFIX}_{i_replica}", f"{NN_PREFIX}_0") - weight_name = weight_name.replace( - f"{PREPROCESSING_PREFIX}_{i_replica}", f"{PREPROCESSING_PREFIX}_0" - ) - weights.append(tf.Variable(node[()], name=weight_name)) +def get_layer_replica_weights(layer, i_replica: int): + """ + Get the weights for the given single replica ``i_replica``, + from a ``layer`` that has weights for all replicas. + + Note that the layer could be a complete a complete NN with many separated sub_layers + each of which containing weights for all replicas together. + This functions separates the per-replica weights and returns the list of weight as if the + input ``layer`` were made of _only_ replica ``i_replica``. + Parameters + ---------- + layer: MetaLayer + the layer to get the weights from + i_replica: int + the replica number + + Returns + ------- + weights: list + list of weights for the replica + """ + if is_stacked_single_replicas(layer): + weights = layer.get_layer(f"{NN_PREFIX}_{i_replica}").weights + else: + weights = [tf.Variable(w[i_replica : i_replica + 1], name=w.name) for w in layer.weights] - h5_group.visititems(append_weights) + return weights + + +def set_layer_replica_weights(layer, weights, i_replica: int): + """ + Set the weights for the given single replica ``i_replica``. + When the input ``layer`` contains weights for many replicas, ensures that + only those corresponding to replica ``i_replica`` are updated. + + Parameters + ---------- + layer: MetaLayer + the layer to set the weights for + weights: list + list of weights for the replica + i_replica: int + the replica number + """ + if is_stacked_single_replicas(layer): + layer.get_layer(f"{NN_PREFIX}_{i_replica}").set_weights(weights) + return - # have to put them in the same order - weights_ordered = [] - weights_model_order = [w.name for w in self.get_replica_weights(0)[weights_key]] - for w in weights_model_order: - for w_h5 in weights: - if w_h5.name == w: - weights_ordered.append(w_h5) + full_weights = [w.numpy() for w in layer.weights] + for w_old, w_new in zip(full_weights, weights): + w_old[i_replica : i_replica + 1] = w_new - return weights_ordered + layer.set_weights(full_weights) diff --git a/n3fit/src/n3fit/backends/keras_backend/base_layers.py b/n3fit/src/n3fit/backends/keras_backend/base_layers.py index 14215e9cf6..36a2016fee 100644 --- a/n3fit/src/n3fit/backends/keras_backend/base_layers.py +++ b/n3fit/src/n3fit/backends/keras_backend/base_layers.py @@ -17,34 +17,44 @@ The names of the layer and the activation function are the ones to be used in the n3fit runcard. """ -from tensorflow.keras.layers import Lambda, LSTM, Dropout, Concatenate -from tensorflow.keras.layers import concatenate, Input # pylint: disable=unused-import +from tensorflow import expand_dims, math, nn +from tensorflow.keras.layers import ( # pylint: disable=unused-import + Dropout, + Input, + Lambda, + concatenate, +) from tensorflow.keras.layers import Dense as KerasDense -from tensorflow import expand_dims +from tensorflow.keras.layers import LSTM, Concatenate # pylint: disable=unused-import from tensorflow.keras.regularizers import l1_l2 -from tensorflow import nn, math from n3fit.backends import MetaLayer +from n3fit.backends.keras_backend.multi_dense import MultiDense + # Custom activation functions def square_activation(x): - """ Squares the input """ - return x*x + """Squares the input""" + return x * x + def modified_tanh(x): - """ A non-saturating version of the tanh function """ - return math.abs(x)*nn.tanh(x) + """A non-saturating version of the tanh function""" + return math.abs(x) * nn.tanh(x) + def leaky_relu(x): - """ Computes the Leaky ReLU activation function """ + """Computes the Leaky ReLU activation function""" return nn.leaky_relu(x, alpha=0.2) + custom_activations = { - "square" : square_activation, + "square": square_activation, "leaky_relu": leaky_relu, "modified_tanh": modified_tanh, } + def LSTM_modified(**kwargs): """ LSTM asks for a sample X timestep X features kind of thing so we need to reshape the input @@ -61,9 +71,11 @@ def ReshapedLSTM(input_tensor): return ReshapedLSTM + class Dense(KerasDense, MetaLayer): pass + def dense_per_flavour(basis_size=8, kernel_initializer="glorot_normal", **dense_kwargs): """ Generates a list of layers which can take as an input either one single layer @@ -85,7 +97,7 @@ def dense_per_flavour(basis_size=8, kernel_initializer="glorot_normal", **dense_ # Need to generate a list of dense layers dense_basis = [ - base_layer_selector("dense", kernel_initializer=initializer, **dense_kwargs) + base_layer_selector("single_dense", kernel_initializer=initializer, **dense_kwargs) for initializer in kernel_initializer ] @@ -116,13 +128,26 @@ def apply_dense(xinput): layers = { "dense": ( + MultiDense, + { + "input_shape": (1,), + "replica_seeds": None, + "kernel_initializer": "glorot_normal", + "units": 5, + "activation": "sigmoid", + "kernel_regularizer": None, + "replica_input": True, + }, + ), + # This one is only used inside dense_per_flavour + "single_dense": ( Dense, { "input_shape": (1,), "kernel_initializer": "glorot_normal", "units": 5, "activation": "sigmoid", - "kernel_regularizer": None + "kernel_regularizer": None, }, ), "dense_per_flavour": ( @@ -143,31 +168,28 @@ def apply_dense(xinput): "concatenate": (Concatenate, {}), } -regularizers = { - 'l1_l2': (l1_l2, {'l1': 0., 'l2': 0.}) - } +regularizers = {'l1_l2': (l1_l2, {'l1': 0.0, 'l2': 0.0})} + def base_layer_selector(layer_name, **kwargs): """ - Given a layer name, looks for it in the `layers` dictionary and returns an instance. + Given a layer name, looks for it in the `layers` dictionary and returns an instance. - The layer dictionary defines a number of defaults - but they can be overwritten/enhanced through kwargs + The layer dictionary defines a number of defaults + but they can be overwritten/enhanced through kwargs - Parameters - ---------- - `layer_name - str with the name of the layer - `**kwargs` - extra optional arguments to pass to the layer (beyond their defaults) + Parameters + ---------- + `layer_name + str with the name of the layer + `**kwargs` + extra optional arguments to pass to the layer (beyond their defaults) """ try: layer_tuple = layers[layer_name] except KeyError as e: raise NotImplementedError( - "Layer not implemented in keras_backend/base_layers.py: {0}".format( - layer_name - ) + "Layer not implemented in keras_backend/base_layers.py: {0}".format(layer_name) ) from e layer_class = layer_tuple[0] @@ -182,6 +204,7 @@ def base_layer_selector(layer_name, **kwargs): return layer_class(**layer_args) + def regularizer_selector(reg_name, **kwargs): """Given a regularizer name looks in the `regularizer` dictionary and return an instance. @@ -204,7 +227,8 @@ def regularizer_selector(reg_name, **kwargs): reg_tuple = regularizers[reg_name] except KeyError: raise NotImplementedError( - "Regularizer not implemented in keras_backend/base_layers.py: {0}".format(reg_name)) + "Regularizer not implemented in keras_backend/base_layers.py: {0}".format(reg_name) + ) reg_class = reg_tuple[0] reg_args = reg_tuple[1] diff --git a/n3fit/src/n3fit/backends/keras_backend/callbacks.py b/n3fit/src/n3fit/backends/keras_backend/callbacks.py index 7349d6be36..d49089c0e7 100644 --- a/n3fit/src/n3fit/backends/keras_backend/callbacks.py +++ b/n3fit/src/n3fit/backends/keras_backend/callbacks.py @@ -4,20 +4,64 @@ The callbacks defined in this module can be passed to the ``callbacks`` argument of the ``perform_fit`` method as a list. - For the most typical usage: ``on_epoch_end``, + For the most typical usage: ``on_batch_end``, they must take as input an epoch number and a log of the partial losses. + + Note: the terminology used everywhere refers to a single training step as a single epoch. + It turns out that to avoid tensorflow overhead, it is beneficial to write a step as a + single batch instead. So callbacks must use ``on_batch_end``. """ import logging from time import time + import numpy as np import tensorflow as tf -from tensorflow.keras.callbacks import TensorBoard, Callback +from tensorflow.keras.callbacks import Callback, TensorBoard log = logging.getLogger(__name__) -class TimerCallback(Callback): +class CallbackStep(Callback): + """ + Wrapper around the keras Callback that keeps track of how the steps are divided + between epochs and batches. + The callback will call ``on_step_end`` instead of ``on_batch_end``. + """ + + def __init__(self): + super().__init__() + self.steps_in_epoch = 0 + self.epochs_finished = 0 + self.steps_per_epoch = 0 # will be defined in the first epoch + self._previous_logs = {} + + def on_epoch_end(self, epoch, logs=None): + if self.steps_per_epoch == 0: + self.steps_per_epoch = self.steps_in_epoch + self.steps_in_epoch = 0 + self.epochs_finished += 1 + + def on_batch_end(self, batch, logs=None): + step_number = self.steps_in_epoch + self.epochs_finished * self.steps_per_epoch + self.on_step_end(step_number, logs) + self.steps_in_epoch += 1 + + def correct_logs(self, logs: dict) -> dict: + """ + The logs that get computed by default are an average over batches. + This converts it into the logs for the current step. + """ + corrected_logs = {} + for k in logs.keys(): + previous_total = self._previous_logs.get(k, 0.0) * self.steps_in_epoch + current_total = logs[k] * (self.steps_in_epoch + 1) + corrected_logs[k] = current_total - previous_total + self._previous_logs = logs + return corrected_logs + + +class TimerCallback(CallbackStep): """Callback to be used during debugging to time the fit""" def __init__(self, count_range=100): @@ -29,8 +73,8 @@ def __init__(self, count_range=100): self.starting_time = None self.last_time = 0 - def on_epoch_end(self, epoch, logs=None): - """ At the end of every epoch it checks the time """ + def on_step_end(self, epoch, logs=None): + """At the end of every epoch it checks the time""" new_time = time() if epoch == 0: # The first epoch is only useful for starting @@ -45,18 +89,18 @@ def on_epoch_end(self, epoch, logs=None): self.last_time = new_time def on_train_end(self, logs=None): - """ Print the results """ + """Print the results""" total_time = time() - self.starting_time n_times = len(self.all_times) # Skip the first 100 epochs to avoid fluctuations due to compilations of part of the code # by epoch 100 all parts of the code have usually been called so it's a good compromise - mean = np.mean(self.all_times[min(110, n_times-1):]) - std = np.std(self.all_times[min(110, n_times-1):]) + mean = np.mean(self.all_times[min(110, n_times - 1) :]) + std = np.std(self.all_times[min(110, n_times - 1) :]) log.info(f"> > Average time per epoch: {mean:.5} +- {std:.5} s") log.info(f"> > > Total time: {total_time/60:.5} min") -class StoppingCallback(Callback): +class StoppingCallback(CallbackStep): """ Given a ``stopping_object``, the callback will monitor the validation chi2 and will stop the training model when the conditions given by ``stopping_object`` @@ -76,10 +120,11 @@ def __init__(self, stopping_object, log_freq=100): self.log_freq = log_freq self.stopping_object = stopping_object - def on_epoch_end(self, epoch, logs=None): - """ Function to be called at the end of every epoch """ + def on_step_end(self, epoch, logs=None): + """Function to be called at the end of every epoch""" print_stats = ((epoch + 1) % self.log_freq) == 0 # Note that the input logs correspond to the fit before the weights are updated + logs = self.correct_logs(logs) self.stopping_object.monitor_chi2(logs, epoch, print_stats=print_stats) if self.stopping_object.stop_here(): self.model.stop_training = True @@ -92,7 +137,7 @@ def on_train_end(self, logs=None): self.stopping_object.make_stop() -class LagrangeCallback(Callback): +class LagrangeCallback(CallbackStep): """ Updates the given datasets with its respective multipliers each ``update_freq`` epochs @@ -117,7 +162,7 @@ def __init__(self, datasets, multipliers, update_freq=100): self.updateable_weights = [] def on_train_begin(self, logs=None): - """ Save an instance of all relevant layers """ + """Save an instance of all relevant layers""" for layer_name in self.datasets: layer = self.model.get_layer(layer_name) self.updateable_weights.append(layer.weights) @@ -132,8 +177,8 @@ def _update_weights(self): for w in ws: w.assign(w * multiplier) - def on_epoch_end(self, epoch, logs=None): - """ Function to be called at the end of every epoch """ + def on_step_end(self, epoch, logs=None): + """Function to be called at the end of every epoch""" if (epoch + 1) % self.update_freq == 0: self._update_weights() diff --git a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py new file mode 100644 index 0000000000..8640a72c00 --- /dev/null +++ b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py @@ -0,0 +1,177 @@ +from typing import List + +import tensorflow as tf +from tensorflow.keras.initializers import Initializer +from tensorflow.keras.layers import Dense, Dropout + + +class MultiDense(Dense): + """ + Dense layer for multiple replicas at the same time. + + Inputs to this layer may contain multiple replicas, for the later layers. + In this case, the `replica_input` argument should be set to `True`, which is the default. + The input shape in this case is (batch_size, replicas, gridsize, features). + For the first layer, there are no replicas yet, and so the `replica_input` argument + should be set to `False`. + The input shape in this case is (batch_size, gridsize, features). + + Weights are initialized using a `replica_seeds` list of seeds, and are identical to the + weights of a list of single dense layers with the same `replica_seeds`. + + + Example + ------- + + >>> from tensorflow.keras import Sequential + >>> from tensorflow.keras.layers import Dense + >>> from tensorflow.keras.initializers import GlorotUniform + >>> import tensorflow as tf + >>> replicas = 2 + >>> multi_dense_model = Sequential([ + >>> MultiDense(units=8, replica_seeds=[42, 43], replica_input=False, kernel_initializer=GlorotUniform(seed=0)), + >>> MultiDense(units=4, replica_seeds=[52, 53], kernel_initializer=GlorotUniform(seed=0)), + >>> ]) + >>> single_models = [ + >>> Sequential([ + >>> Dense(units=8, kernel_initializer=GlorotUniform(seed=42 + r)), + >>> Dense(units=4, kernel_initializer=GlorotUniform(seed=52 + r)), + >>> ]) + >>> for r in range(replicas) + >>> ] + >>> gridsize, features = 100, 2 + >>> multi_dense_model.build(input_shape=(None, gridsize, features)) + >>> for single_model in single_models: + >>> single_model.build(input_shape=(None, gridsize, features)) + >>> test_input = tf.random.uniform(shape=(1, gridsize, features)) + >>> multi_dense_output = multi_dense_model(test_input) + >>> single_dense_output = tf.stack([single_model(test_input) for single_model in single_models], axis=1) + >>> tf.reduce_all(tf.equal(multi_dense_output, single_dense_output)) + + Parameters + ---------- + replica_seeds: List[int] + List of seeds per replica for the kernel initializer. + kernel_initializer: Initializer + Initializer class for the kernel. + replica_input: bool (default: True) + Whether the input already contains multiple replicas. + """ + + def __init__( + self, + replica_seeds: List[int], + kernel_initializer: Initializer, + replica_input: bool = True, + **kwargs + ): + super().__init__(**kwargs) + self.replicas = len(replica_seeds) + self.replica_seeds = replica_seeds + self.kernel_initializer = MultiInitializer( + single_initializer=kernel_initializer, replica_seeds=replica_seeds + ) + self.bias_initializer = MultiInitializer( + single_initializer=self.bias_initializer, replica_seeds=replica_seeds + ) + self.replica_input = replica_input + + def build(self, input_shape): + input_dim = input_shape[-1] + self.kernel = self.add_weight( + name="kernel", + shape=(self.replicas, input_dim, self.units), + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + if self.use_bias: + self.bias = self.add_weight( + name="bias", + shape=(self.replicas, 1, self.units), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + ) + else: + self.bias = None + self.input_spec.axes = {-1: input_dim} + self.built = True + + def call(self, inputs): + """ + Compute output of shape (batch_size, replicas, gridsize, units). + + For the first layer, (self.replica_input is False), this is equivalent to + applying each replica separately and concatenating along the last axis. + If the input already contains multiple replica outputs, it is equivalent + to applying each replica to its corresponding input. + """ + if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype: + inputs = tf.cast(inputs, dtype=self._compute_dtype_object) + + input_axes = 'brnf' if self.replica_input else 'bnf' + einrule = input_axes + ',rfg->brng' + outputs = tf.einsum(einrule, inputs, self.kernel) + + # Reshape the output back to the original ndim of the input. + if not tf.executing_eagerly(): + output_shape = self.compute_output_shape(inputs.shape.as_list()) + outputs.set_shape(output_shape) + + if self.use_bias: + outputs = outputs + self.bias + + if self.activation is not None: + outputs = self.activation(outputs) + + return outputs + + def compute_output_shape(self, input_shape): + # Remove the replica axis from the input shape. + if self.replica_input: + input_shape = input_shape[:1] + input_shape[2:] + + output_shape = super().compute_output_shape(input_shape) + + # Add back the replica axis to the output shape. + output_shape = output_shape[:1] + [self.replicas] + output_shape[1:] + + return output_shape + + def get_config(self): + config = super().get_config() + config.update({"replica_input": self.replica_input, "replica_seeds": self.replica_seeds}) + return config + + +class MultiInitializer(Initializer): + """ + Multi replica initializer that exactly replicates a stack of single replica initializers. + + Weights are stacked on the first axis, and per replica seeds are added to a base seed of the + given single replica initializer. + + Parameters + ---------- + single_initializer: Initializer + Initializer class for the kernel. + replica_seeds: List[int] + List of seeds per replica for the kernel initializer. + """ + + def __init__(self, single_initializer: Initializer, replica_seeds: List[int]): + self.single_initializer = single_initializer + self.base_seed = single_initializer.seed if hasattr(single_initializer, "seed") else None + self.replica_seeds = replica_seeds + + def __call__(self, shape, dtype=None, **kwargs): + shape = shape[1:] # Remove the replica axis from the shape. + per_replica_weights = [] + for replica_seed in self.replica_seeds: + if self.base_seed is not None: + self.single_initializer.seed = self.base_seed + replica_seed + + per_replica_weights.append(self.single_initializer(shape, dtype, **kwargs)) + + return tf.stack(per_replica_weights, axis=0) diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py index 7b509f0065..d93f3b1d9a 100644 --- a/n3fit/src/n3fit/checks.py +++ b/n3fit/src/n3fit/checks.py @@ -108,12 +108,29 @@ def check_initializer(initializer): raise CheckError(f"Initializer {initializer} not accepted by {MetaLayer}") +def check_layer_type_implemented(parameters): + """Checks whether the layer_type is implemented""" + layer_type = parameters.get("layer_type") + implemented_types = ["dense", "dense_per_flavour"] + if layer_type not in implemented_types: + raise CheckError( + f"Layer type {layer_type} not implemented, must be one of {implemented_types}" + ) + + def check_dropout(parameters): """Checks the dropout setup (positive and smaller than 1.0)""" dropout = parameters.get("dropout") if dropout is not None and not 0.0 <= dropout <= 1.0: raise CheckError(f"Dropout must be between 0 and 1, got: {dropout}") + layer_type = parameters.get("layer_type") + if dropout is not None and dropout > 0.0 and layer_type == "dense_per_flavour": + raise CheckError( + "Dropout is not compatible with the dense_per_flavour layer type, " + "please use instead `parameters::layer_type: dense`" + ) + def check_tensorboard(tensorboard): """Check that the tensorbard callback can be enabled correctly""" @@ -168,6 +185,7 @@ def wrapper_check_NN(basis, tensorboard, save, load, parameters): check_consistent_layers(parameters) check_basis_with_layers(basis, parameters) check_stopping(parameters) + check_layer_type_implemented(parameters) check_dropout(parameters) check_lagrange_multipliers(parameters, "integrability") check_lagrange_multipliers(parameters, "positivity") @@ -367,8 +385,8 @@ def check_consistent_parallel(parameters, parallel_models, same_trvl_per_replica "Replicas cannot be run in parallel with different training/validation " " masks, please set `same_trvl_per_replica` to True in the runcard" ) - if parameters.get("layer_type") != "dense": - raise CheckError("Parallelization has only been tested with layer_type=='dense'") + if parameters.get("layer_type") == "dense_per_flavour": + raise CheckError("Parallelization has not been tested with layer_type=='dense_per_flavour'") @make_argcheck @@ -409,10 +427,9 @@ def check_fiatlux_pdfs_id(replicas, fiatlux): f"Cannot generate a photon replica with id larger than the number of replicas of the PDFs set {luxset.name}:\nreplica id={max_id}, replicas of {luxset.name} = {pdfs_ids}" ) + @make_argcheck def check_multireplica_qed(replicas, fiatlux): if fiatlux is not None: if len(replicas) > 1: - raise CheckError( - "At the moment, running a multireplica QED fits is not allowed." - ) + raise CheckError("At the moment, running a multireplica QED fits is not allowed.") diff --git a/n3fit/src/n3fit/layers/msr_normalization.py b/n3fit/src/n3fit/layers/msr_normalization.py index 0755cad0b4..01a3648cb7 100644 --- a/n3fit/src/n3fit/layers/msr_normalization.py +++ b/n3fit/src/n3fit/layers/msr_normalization.py @@ -40,6 +40,7 @@ def __init__(self, mode: str = "ALL", replicas: int = 1, **kwargs): else: raise ValueError(f"Mode {mode} not accepted for sum rules") + self.replicas = replicas indices = [] self.divisor_indices = [] if self._msr_enabled: @@ -83,6 +84,7 @@ def call(self, pdf_integrated, photon_integral): reshape = lambda x: op.transpose(x[0]) y = reshape(pdf_integrated) photon_integral = reshape(photon_integral) + numerators = [] if self._msr_enabled: @@ -96,8 +98,9 @@ def call(self, pdf_integrated, photon_integral): divisors = op.gather(y, self.divisor_indices, axis=0) # Fill in the rest of the flavours with 1 + num_flavours = y.shape[0] norm_constants = op.scatter_to_one( - numerators / divisors, indices=self.indices, output_shape=y.shape + numerators / divisors, indices=self.indices, output_shape=(num_flavours, self.replicas) ) return op.batchit(op.transpose(norm_constants), batch_dimension=1) diff --git a/n3fit/src/n3fit/layers/preprocessing.py b/n3fit/src/n3fit/layers/preprocessing.py index 77ea760607..f8ab1f8f55 100644 --- a/n3fit/src/n3fit/layers/preprocessing.py +++ b/n3fit/src/n3fit/layers/preprocessing.py @@ -33,6 +33,8 @@ class Preprocessing(MetaLayer): Whether large x preprocessing factor should be active seed: int seed for the initializer of the random alpha and beta values + num_replicas: int (default 1) + The number of replicas """ def __init__( @@ -40,6 +42,7 @@ def __init__( flav_info: Optional[list] = None, seed: int = 0, large_x: bool = True, + num_replicas: int = 1, **kwargs, ): if flav_info is None: @@ -49,6 +52,8 @@ def __init__( self.flav_info = flav_info self.seed = seed self.large_x = large_x + self.num_replicas = num_replicas + self.alphas = [] self.betas = [] super().__init__(**kwargs) @@ -87,7 +92,7 @@ def generate_weight(self, name: str, kind: str, dictionary: dict, set_to_zero: b # Generate the new trainable (or not) parameter newpar = self.builder_helper( name=name, - kernel_shape=(1,), + kernel_shape=(self.num_replicas, 1), initializer=initializer, trainable=trainable, constraint=constraint, @@ -117,9 +122,12 @@ def call(self, x): Returns ------- - prefactor: tensor(shape=[1,N,F]) + prefactor: tensor(shape=[1,R,N,F]) """ - alphas = op.stack(self.alphas, axis=1) - betas = op.stack(self.betas, axis=1) + # weight tensors of shape (R, 1, F) + alphas = op.stack(self.alphas, axis=-1) + betas = op.stack(self.betas, axis=-1) + + x = op.batchit(x, batch_dimension=0) return x ** (1 - alphas) * (1 - x) ** betas diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index ae6ed01182..5a69fe9396 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -14,7 +14,16 @@ import numpy as np -from n3fit.backends import Input, Lambda, MetaLayer, MetaModel, base_layer_selector +from n3fit.backends import ( + NN_LAYER_ALL_REPLICAS, + NN_PREFIX, + PREPROCESSING_LAYER_ALL_REPLICAS, + Input, + Lambda, + MetaLayer, + MetaModel, + base_layer_selector, +) from n3fit.backends import operations as op from n3fit.backends import regularizer_selector from n3fit.layers import ( @@ -299,103 +308,6 @@ def observable_generator( return layer_info -# Network generation functions -def generate_dense_network( - nodes_in: int, - nodes: int, - activations: List[str], - initializer_name: str = "glorot_normal", - seed: int = 0, - dropout_rate: float = 0.0, - regularizer: str = None, -): - """ - Generates a dense network - - the dropout rate, if selected, is set - for the next to last layer (i.e., the last layer of the dense network before getting to - the output layer for the basis choice) - """ - list_of_pdf_layers = [] - number_of_layers = len(nodes) - if dropout_rate > 0: - dropout_layer = number_of_layers - 2 - else: - dropout_layer = -1 - for i, (nodes_out, activation) in enumerate(zip(nodes, activations)): - # if we have dropout set up, add it to the list - if dropout_rate > 0 and i == dropout_layer: - list_of_pdf_layers.append(base_layer_selector("dropout", rate=dropout_rate)) - - # select the initializer and move the seed - init = MetaLayer.select_initializer(initializer_name, seed=seed + i) - - # set the arguments that will define the layer - arguments = { - "kernel_initializer": init, - "units": int(nodes_out), - "activation": activation, - "input_shape": (nodes_in,), - "kernel_regularizer": regularizer, - } - - layer = base_layer_selector("dense", **arguments) - - list_of_pdf_layers.append(layer) - nodes_in = int(nodes_out) - return list_of_pdf_layers - - -def generate_dense_per_flavour_network( - nodes_in, nodes, activations, initializer_name="glorot_normal", seed=0, basis_size=8 -): - """ - For each flavour generates a dense network of the chosen size - - """ - list_of_pdf_layers = [] - number_of_layers = len(nodes) - current_seed = seed - for i, (nodes_out, activation) in enumerate(zip(nodes, activations)): - initializers = [] - for _ in range(basis_size): - # select the initializer and move the seed - initializers.append(MetaLayer.select_initializer(initializer_name, seed=current_seed)) - current_seed += 1 - - # set the arguments that will define the layer - # but careful, the last layer must be nodes = 1 - # TODO the mismatch is due to the fact that basis_size - # is set to the number of nodes of the last layer when it should - # come from the runcard - if i == number_of_layers - 1: - nodes_out = 1 - arguments = { - "kernel_initializer": initializers, - "units": nodes_out, - "activation": activation, - "input_shape": (nodes_in,), - "basis_size": basis_size, - } - - layer = base_layer_selector("dense_per_flavour", **arguments) - - if i == number_of_layers - 1: - # For the last layer, apply concatenate - concat = base_layer_selector("concatenate") - - def output_layer(ilayer): - result = layer(ilayer) - return concat(result) - - list_of_pdf_layers.append(output_layer) - else: - list_of_pdf_layers.append(layer) - - nodes_in = int(nodes_out) - return list_of_pdf_layers - - def generate_pdf_model( nodes: List[int] = None, activations: List[str] = None, @@ -669,67 +581,50 @@ def pdfNN_layer_generator( else: sumrule_layer = lambda x: x - # Only these layers change from replica to replica: - nn_replicas = [] - preprocessing_factor_replicas = [] - for i_replica, replica_seed in enumerate(seed): - preprocessing_factor_replicas.append( - Preprocessing( - flav_info=flav_info, - input_shape=(1,), - name=f"preprocessing_factor_{i_replica}", - seed=replica_seed + number_of_layers, - large_x=not subtract_one, - ) - ) - nn_replicas.append( - generate_nn( - layer_type=layer_type, - input_dimensions=nn_input_dimensions, - nodes=nodes, - activations=activations, - initializer_name=initializer_name, - replica_seed=replica_seed, - dropout=dropout, - regularizer=regularizer, - regularizer_args=regularizer_args, - last_layer_nodes=last_layer_nodes, - name=f"NN_{i_replica}", - ) - ) + compute_preprocessing_factor = Preprocessing( + flav_info=flav_info, + input_shape=(1,), + name=PREPROCESSING_LAYER_ALL_REPLICAS, + seed=seed[0] + number_of_layers, + large_x=not subtract_one, + num_replicas=num_replicas, + ) - # Apply NN layers for all replicas to a given input grid - def neural_network_replicas(x, postfix=""): - NNs_x = Lambda(lambda nns: op.stack(nns, axis=1), name=f"NNs{postfix}")( - [nn(x) for nn in nn_replicas] - ) + nn_replicas = generate_nn( + layer_type=layer_type, + nodes_in=nn_input_dimensions, + nodes=nodes, + activations=activations, + initializer_name=initializer_name, + replica_seeds=seed, + dropout=dropout, + regularizer=regularizer, + regularizer_args=regularizer_args, + last_layer_nodes=last_layer_nodes, + ) + + # The NN subtracted by NN(1), if applicable + def nn_subtracted(x): + NNs_x = nn_replicas(x) if subtract_one: x_eq_1_processed = process_input(layer_x_eq_1) - NNs_x_1 = Lambda(lambda nns: op.stack(nns, axis=1), name=f"NNs{postfix}_x_1")( - [nn(x_eq_1_processed) for nn in nn_replicas] - ) + NNs_x_1 = nn_replicas(x_eq_1_processed) NNs_x = subtract_one_layer([NNs_x, NNs_x_1]) return NNs_x - # Apply preprocessing factors for all replicas to a given input grid - def preprocessing_replicas(x, postfix=""): - return Lambda(lambda pfs: op.stack(pfs, axis=1), name=f"prefactors{postfix}")( - [pf(x) for pf in preprocessing_factor_replicas] - ) - - def compute_unnormalized_pdf(x, postfix=""): + def compute_unnormalized_pdf(x): # Preprocess the input grid x_nn_input = extract_nn_input(x) x_processed = process_input(x_nn_input) x_original = extract_original(x) # Compute the neural network output - NNs_x = neural_network_replicas(x_processed, postfix=postfix) + NNs_x = nn_subtracted(x_processed) # Compute the preprocessing factor - preprocessing_factors_x = preprocessing_replicas(x_original, postfix=postfix) + preprocessing_factors_x = compute_preprocessing_factor(x_original) # Apply the preprocessing factor pref_NNs_x = apply_preprocessing_factor([preprocessing_factors_x, NNs_x]) @@ -746,7 +641,7 @@ def compute_unnormalized_pdf(x, postfix=""): PDFs_unnormalized = compute_unnormalized_pdf(pdf_input) if impose_sumrule: - PDFs_integration_grid = compute_unnormalized_pdf(integrator_input, postfix="_x_integ") + PDFs_integration_grid = compute_unnormalized_pdf(integrator_input) if photons: # add batch and flavor dimensions @@ -770,54 +665,150 @@ def compute_unnormalized_pdf(x, postfix=""): if photons: PDFs = layer_photon(PDFs) - if replica_axis: - pdf_model = MetaModel(model_input, PDFs, name=f"PDFs", scaler=scaler) - else: - pdf_model = MetaModel(model_input, PDFs[:, 0], name=f"PDFs", scaler=scaler) + if not replica_axis: + PDFs = Lambda(lambda pdfs: pdfs[:, 0], name="remove_replica_axis")(PDFs) + pdf_model = MetaModel(model_input, PDFs, name=f"PDFs", scaler=scaler) return pdf_model def generate_nn( layer_type: str, - input_dimensions: int, + nodes_in: int, nodes: List[int], activations: List[str], initializer_name: str, - replica_seed: int, + replica_seeds: List[int], dropout: float, regularizer: str, regularizer_args: dict, last_layer_nodes: int, - name: str, ) -> MetaModel: """ Create the part of the model that contains all of the actual neural network - layers. + layers, for each replica. + + Parameters + ---------- + layer_type: str + Type of layer to use. Can be "dense" or "dense_per_flavour". + nodes_in: int + Number of nodes in the input layer. + nodes: List[int] + Number of nodes in each hidden layer. + activations: List[str] + Activation function to use in each hidden layer. + initializer_name: str + Name of the initializer to use. + replica_seeds: List[int] + List of seeds to use for each replica. + dropout: float + Dropout rate to use (if 0, no dropout is used). + regularizer: str + Name of the regularizer to use. + regularizer_args: dict + Arguments to pass to the regularizer. + last_layer_nodes: int + Number of nodes in the last layer. + + Returns + ------- + nn_replicas: MetaModel + Single model containing all replicas. """ - common_args = { - 'nodes_in': input_dimensions, - 'nodes': nodes, - 'activations': activations, - 'initializer_name': initializer_name, - 'seed': replica_seed, - } - if layer_type == "dense": + nodes_list = list(nodes) # so we can modify it + x_input = Input(shape=(None, nodes_in), batch_size=1, name='xgrids_processed') + + custom_args = {} + if layer_type == "dense_per_flavour": + # set the arguments that will define the layer + # but careful, the last layer must be nodes = 1 + # TODO the mismatch is due to the fact that basis_size + # is set to the number of nodes of the last layer when it should + # come from the runcard + nodes_list[-1] = 1 + basis_size = last_layer_nodes + custom_args['basis_size'] = basis_size + + def initializer_generator(seed, i_layer): + seed += i_layer * basis_size + initializers = [ + MetaLayer.select_initializer(initializer_name, seed=seed + b) + for b in range(basis_size) + ] + return initializers + + else: # "dense" reg = regularizer_selector(regularizer, **regularizer_args) - list_of_pdf_layers = generate_dense_network( - **common_args, dropout_rate=dropout, regularizer=reg - ) - elif layer_type == "dense_per_flavour": - list_of_pdf_layers = generate_dense_per_flavour_network( - **common_args, basis_size=last_layer_nodes - ) + custom_args['regularizer'] = reg + + def initializer_generator(seed, i_layer): + seed += i_layer + return MetaLayer.select_initializer(initializer_name, seed=seed) + + # First create all the layers... + # list_of_pdf_layers[d][r] is the layer at depth d for replica r + list_of_pdf_layers = [] + for i_layer, (nodes_out, activation) in enumerate(zip(nodes_list, activations)): + if layer_type == "dense": + layers = base_layer_selector( + layer_type, + replica_seeds=replica_seeds, + kernel_initializer=initializer_generator(0, i_layer), + units=nodes_out, + activation=activation, + replica_input=(i_layer != 0), + **custom_args, + ) + else: + layers = [ + base_layer_selector( + layer_type, + kernel_initializer=initializer_generator(replica_seed, i_layer), + units=nodes_out, + activation=activation, + input_shape=(nodes_in,), + **custom_args, + ) + for replica_seed in replica_seeds + ] + list_of_pdf_layers.append(layers) + nodes_in = int(nodes_out) + + # add dropout as second to last layer + if dropout > 0: + dropout_layer = base_layer_selector("dropout", rate=dropout) + list_of_pdf_layers.insert(-2, dropout_layer) + + # In case of per flavour network, concatenate at the last layer + if layer_type == "dense_per_flavour": + concat = base_layer_selector("concatenate") + list_of_pdf_layers[-1] = [lambda x: concat(layer(x)) for layer in list_of_pdf_layers[-1]] + + # Apply all layers to the input to create the models + if layer_type == "dense": + pdfs = x_input + for layer in list_of_pdf_layers: + pdfs = layer(pdfs) + model = MetaModel({'NN_input': x_input}, pdfs, name=NN_LAYER_ALL_REPLICAS) + + return model + + pdfs = [layer(x_input) for layer in list_of_pdf_layers[0]] + + for layers in list_of_pdf_layers[1:]: + # Since some layers (dropout) are shared, we have to treat them separately + if type(layers) is list: + pdfs = [layer(x) for layer, x in zip(layers, pdfs)] + else: + pdfs = [layers(x) for x in pdfs] - # Note: using a Sequential model would be more appropriate, but it would require - # creating a MetaSequential model. - x = Input(shape=(None, input_dimensions), batch_size=1, name='xgrids_processed') - pdf = x - for layer in list_of_pdf_layers: - pdf = layer(pdf) + # Wrap the pdfs in a MetaModel to enable getting/setting of weights later + pdfs = [ + MetaModel({'NN_input': x_input}, pdf, name=f"{NN_PREFIX}_{i_replica}")(x_input) + for i_replica, pdf in enumerate(pdfs) + ] + pdfs = Lambda(lambda nns: op.stack(nns, axis=1), name=f"stack_replicas")(pdfs) + model = MetaModel({'NN_input': x_input}, pdfs, name=NN_LAYER_ALL_REPLICAS) - model = MetaModel({'NN_input': x}, pdf, name=name) return model diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 99e9f013db..acbcc8b3cd 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -17,6 +17,7 @@ from n3fit import model_gen from n3fit.backends import MetaModel, callbacks, clear_backend_state from n3fit.backends import operations as op +from n3fit.backends import NN_LAYER_ALL_REPLICAS import n3fit.hyper_optimization.penalties import n3fit.hyper_optimization.rewards from n3fit.scaler import generate_scaler @@ -454,7 +455,7 @@ def _model_generation(self, xinput, pdf_model, partition, partition_idx): training.summary() pdf_model = training.get_layer("PDFs") pdf_model.summary() - nn_model = pdf_model.get_layer("NN_0") + nn_model = pdf_model.get_layer(NN_LAYER_ALL_REPLICAS) nn_model.summary() # We may have fits without sumrules imposed try: diff --git a/n3fit/src/n3fit/tests/regressions/weights_1.h5 b/n3fit/src/n3fit/tests/regressions/weights_1.h5 index 2c5b02e71e..9ffdaea7f3 100644 Binary files a/n3fit/src/n3fit/tests/regressions/weights_1.h5 and b/n3fit/src/n3fit/tests/regressions/weights_1.h5 differ diff --git a/n3fit/src/n3fit/tests/regressions/weights_2.h5 b/n3fit/src/n3fit/tests/regressions/weights_2.h5 index e5a8adea86..18c593da9e 100644 Binary files a/n3fit/src/n3fit/tests/regressions/weights_2.h5 and b/n3fit/src/n3fit/tests/regressions/weights_2.h5 differ diff --git a/n3fit/src/n3fit/tests/test_modelgen.py b/n3fit/src/n3fit/tests/test_modelgen.py index 4cec4574ee..c87523f838 100644 --- a/n3fit/src/n3fit/tests/test_modelgen.py +++ b/n3fit/src/n3fit/tests/test_modelgen.py @@ -5,62 +5,50 @@ It checks that both the number of layers and the shape of the weights of the layers are what is expected """ -import numpy as np -import n3fit.model_gen -from n3fit.backends import MetaModel -from n3fit.backends import operations as op +from n3fit.backends import NN_PREFIX +from n3fit.model_gen import generate_nn INSIZE = 16 OUT_SIZES = (4, 3) BASIS_SIZE = 3 +COMMON_ARGS = { + "nodes_in": INSIZE, + "nodes": OUT_SIZES, + "activations": ["sigmoid", "tanh"], + "initializer_name": "glorot_uniform", + "replica_seeds": [0], + "dropout": 0.0, + "regularizer": None, + "regularizer_args": {}, + "last_layer_nodes": BASIS_SIZE, +} + def test_generate_dense_network(): - nodes_in = INSIZE - nodes_out = OUT_SIZES - activations = ["sigmoid", "tanh"] - layers = n3fit.model_gen.generate_dense_network(nodes_in, nodes_out, activations) - arr = np.random.rand(1, INSIZE) - input_layer = op.numpy_to_input(arr) - curr_layer = input_layer - for layer in layers: - curr_layer = layer(curr_layer) - modelito = MetaModel({"input": input_layer}, curr_layer) + nn = generate_nn("dense", **COMMON_ARGS) + # The number of layers should be input layer + len(OUT_SIZES) - assert len(modelito.layers) == len(OUT_SIZES) + 1 + assert len(nn.layers) == len(OUT_SIZES) + 1 # Check that the number of parameters is as expected - # We expect 4 weights where the two first ones are - # (INSIZE, OUT_SIZE[0]) (OUT_SIZE[0],) - # and the second one - # (OUT_SIZE[0], OUT_SIZE[1]) (OUT_SIZE[1],) expected_sizes = [ - (INSIZE, OUT_SIZES[0]), - (OUT_SIZES[0],), - OUT_SIZES, - (OUT_SIZES[1],), + (1, INSIZE, OUT_SIZES[0]), + (1, 1, OUT_SIZES[0]), + (1, *OUT_SIZES), + (1, 1, OUT_SIZES[1]), ] - for weight, esize in zip(modelito.weights, expected_sizes): + for weight, esize in zip(nn.weights, expected_sizes): assert weight.shape == esize def test_generate_dense_per_flavour_network(): - nodes_in = INSIZE - nodes_out = OUT_SIZES - activations = ["sigmoid", "tanh"] - layers = n3fit.model_gen.generate_dense_per_flavour_network( - nodes_in, nodes_out, activations, basis_size=BASIS_SIZE - ) - arr = np.random.rand(1, INSIZE) - input_layer = op.numpy_to_input(arr) - curr_layer = input_layer - for layer in layers: - curr_layer = layer(curr_layer) - modelito = MetaModel({"input": input_layer}, curr_layer) + nn = generate_nn("dense_per_flavour", **COMMON_ARGS).get_layer(f"{NN_PREFIX}_0") + # The number of layers should be input + BASIS_SIZE*len(OUT_SIZES) + concatenate - assert len(modelito.layers) == BASIS_SIZE * len(OUT_SIZES) + 2 + assert len(nn.layers) == BASIS_SIZE * len(OUT_SIZES) + 2 # The shape for this network of denses for flavours will depend on the basis_size expected_sizes = [] expected_sizes += BASIS_SIZE * [(INSIZE, OUT_SIZES[0]), (OUT_SIZES[0],)] expected_sizes += BASIS_SIZE * [(OUT_SIZES[0], 1), (1,)] - for weight, esize in zip(modelito.weights, expected_sizes): + for weight, esize in zip(nn.weights, expected_sizes): assert weight.shape == esize diff --git a/n3fit/src/n3fit/tests/test_multidense.py b/n3fit/src/n3fit/tests/test_multidense.py new file mode 100644 index 0000000000..b912c0bd82 --- /dev/null +++ b/n3fit/src/n3fit/tests/test_multidense.py @@ -0,0 +1,68 @@ +import numpy as np +import tensorflow as tf +from tensorflow.keras import Sequential +from tensorflow.keras.initializers import GlorotUniform +from tensorflow.keras.layers import Dense + +from n3fit.backends.keras_backend.multi_dense import MultiDense +from n3fit.model_gen import generate_nn + + +def test_multidense(): + replicas = 2 + multi_dense_model = Sequential( + [ + MultiDense( + units=8, + replica_seeds=[42, 43], + replica_input=False, + kernel_initializer=GlorotUniform(seed=0), + ), + MultiDense(units=4, replica_seeds=[52, 53], kernel_initializer=GlorotUniform(seed=100)), + ] + ) + single_models = [ + Sequential( + [ + Dense(units=8, kernel_initializer=GlorotUniform(seed=42 + r)), + Dense(units=4, kernel_initializer=GlorotUniform(seed=52 + r + 100)), + ] + ) + for r in range(replicas) + ] + + gridsize, features = 100, 3 + multi_dense_model.build(input_shape=(None, gridsize, features)) + for single_model in single_models: + single_model.build(input_shape=(None, gridsize, features)) + + test_input = tf.random.uniform(shape=(1, gridsize, features)) + multi_dense_output = multi_dense_model(test_input) + single_dense_output = tf.stack( + [single_model(test_input) for single_model in single_models], axis=1 + ) + + np.testing.assert_allclose(multi_dense_output, single_dense_output, atol=1e-6, rtol=1e-4) + + +def test_initializers(): + input_shape = (None, 3, 1) + dense_layers = [] + for r in range(2): + dense_layer = Dense(units=2, kernel_initializer=GlorotUniform(seed=42 + r)) + dense_layer.build(input_shape=input_shape) + dense_layers.append(dense_layer) + stacked_weights = tf.stack([dense_layer.weights[0] for dense_layer in dense_layers], axis=0) + + multi_dense_layer = MultiDense( + units=2, + replica_seeds=[0, 1], + replica_input=False, + kernel_initializer=GlorotUniform(seed=42), + ) + multi_dense_layer.build(input_shape=input_shape) + + multi_dense_weights = multi_dense_layer.weights[0].numpy() + stacked_weights = stacked_weights.numpy() + + np.testing.assert_allclose(multi_dense_weights, stacked_weights) diff --git a/n3fit/src/n3fit/tests/test_preprocessing.py b/n3fit/src/n3fit/tests/test_preprocessing.py index 3bf6a8966c..0280011343 100644 --- a/n3fit/src/n3fit/tests/test_preprocessing.py +++ b/n3fit/src/n3fit/tests/test_preprocessing.py @@ -22,45 +22,47 @@ def test_preprocessing(): test_prefactors = [ [ [ - 3.7446213e-01, - 1.9785003e-01, - 2.7931085e-01, - 2.0784079e-01, - 4.5369801e-01, - 2.7796263e-01, - 5.4610312e-01, - 2.4907256e-02, - ], - [ - 6.2252983e-04, - 3.0504008e-05, - 4.5713778e-03, - 1.0905267e-03, - 4.0506415e-02, - 5.9004971e-05, - 4.5114113e-03, - 2.6757403e-09, - ], - [ - 4.1631009e-02, - 1.0586979e-02, - 8.3202787e-02, - 4.3506064e-02, - 2.2559988e-01, - 1.5161950e-02, - 1.0105091e-01, - 1.4808348e-04, - ], - [ - 1.1616933e-01, - 4.2717375e-02, - 1.5620175e-01, - 9.7478621e-02, - 3.2600221e-01, - 5.8901049e-02, - 2.1937098e-01, - 1.8343410e-03, - ], + [ + 3.7446213e-01, + 1.9785003e-01, + 2.7931085e-01, + 2.0784079e-01, + 4.5369801e-01, + 2.7796263e-01, + 5.4610312e-01, + 2.4907256e-02, + ], + [ + 6.2252983e-04, + 3.0504008e-05, + 4.5713778e-03, + 1.0905267e-03, + 4.0506415e-02, + 5.9004971e-05, + 4.5114113e-03, + 2.6757403e-09, + ], + [ + 4.1631009e-02, + 1.0586979e-02, + 8.3202787e-02, + 4.3506064e-02, + 2.2559988e-01, + 1.5161950e-02, + 1.0105091e-01, + 1.4808348e-04, + ], + [ + 1.1616933e-01, + 4.2717375e-02, + 1.5620175e-01, + 9.7478621e-02, + 3.2600221e-01, + 5.8901049e-02, + 2.1937098e-01, + 1.8343410e-03, + ], + ] ] ] prefactors = prepro(test_x) diff --git a/n3fit/src/n3fit/vpinterface.py b/n3fit/src/n3fit/vpinterface.py index 7e2e754e0f..12e5aca374 100644 --- a/n3fit/src/n3fit/vpinterface.py +++ b/n3fit/src/n3fit/vpinterface.py @@ -24,6 +24,7 @@ import numpy as np import numpy.linalg as la +from n3fit.backends import PREPROCESSING_LAYER_ALL_REPLICAS from validphys.arclength import arc_lengths, integrability_number from validphys.core import PDF, MCStats from validphys.lhapdfset import LHAPDFSet @@ -224,13 +225,7 @@ def get_preprocessing_factors(self, replica=None): if replica is None: replica = 1 # Replicas start counting in 1 so: - preprocessing_layers = self._models[replica - 1].get_layer_re(r"preprocessing_factor_\d") - if len(preprocessing_layers) > 1: - # We really don't want to fail at this point, but print a warning at least... - log.warning("More than one preprocessing layer found within the model!") - elif len(preprocessing_layers) < 1: - log.warning("No preprocessing layer found within the model!") - preprocessing_layer = preprocessing_layers[0] + preprocessing_layer = self._models[replica - 1].get_layer(PREPROCESSING_LAYER_ALL_REPLICAS) alphas_and_betas = None if self.fit_basis is not None: