diff --git a/n3fit/runcards/examples/developing_weights.h5 b/n3fit/runcards/examples/developing_weights.h5 index 542fca06f9..385ea55a75 100644 Binary files a/n3fit/runcards/examples/developing_weights.h5 and b/n3fit/runcards/examples/developing_weights.h5 differ diff --git a/n3fit/src/n3fit/backends/__init__.py b/n3fit/src/n3fit/backends/__init__.py index f49bbd0f53..3c48317a86 100644 --- a/n3fit/src/n3fit/backends/__init__.py +++ b/n3fit/src/n3fit/backends/__init__.py @@ -1,20 +1,23 @@ -from n3fit.backends.keras_backend.internal_state import ( - set_initial_state, - clear_backend_state, - set_eager -) +from n3fit.backends.keras_backend import callbacks, constraints, operations from n3fit.backends.keras_backend.MetaLayer import MetaLayer -from n3fit.backends.keras_backend.MetaModel import MetaModel +from n3fit.backends.keras_backend.MetaModel import ( + NN_LAYER_ALL_REPLICAS, + NN_PREFIX, + PREPROCESSING_LAYER_ALL_REPLICAS, + MetaModel, +) from n3fit.backends.keras_backend.base_layers import ( + Concatenate, Input, - concatenate, Lambda, base_layer_selector, + concatenate, regularizer_selector, - Concatenate, ) -from n3fit.backends.keras_backend import operations -from n3fit.backends.keras_backend import constraints -from n3fit.backends.keras_backend import callbacks +from n3fit.backends.keras_backend.internal_state import ( + clear_backend_state, + set_eager, + set_initial_state, +) print("Using Keras backend") diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index 1b0990bb03..fde7c4f987 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -46,7 +46,8 @@ } NN_PREFIX = "NN" -PREPROCESSING_PREFIX = "preprocessing_factor" +NN_LAYER_ALL_REPLICAS = "all_NNs" +PREPROCESSING_LAYER_ALL_REPLICAS = "preprocessing_factor" # Some keys need to work for everyone for k, v in optimizers.items(): @@ -156,7 +157,7 @@ def perform_fit(self, x=None, y=None, epochs=1, **kwargs): of the model (the loss functions) to the partial losses. If the model was compiled with input and output data, they will not be passed through. - In this case by default the number of `epochs` will be set to 1 + In this case by default the number of ``epochs`` will be set to 1 ex: {'loss': [100], 'dataset_a_loss1' : [67], 'dataset_2_loss': [33]} @@ -228,7 +229,7 @@ def compile( ): """ Compile the model given an optimizer and a list of loss functions. - The optimizer must be one of those implemented in the `optimizer` attribute of this class. + The optimizer must be one of those implemented in the ``optimizer`` attribute of this class. Options: - A learning rate and a list of target outpout can be defined. @@ -353,14 +354,10 @@ def get_replica_weights(self, i_replica): dict dictionary with the weights of the replica """ - NN_weights = [ - tf.Variable(w, name=w.name) for w in self.get_layer(f"{NN_PREFIX}_{i_replica}").weights - ] - prepro_weights = [ - tf.Variable(w, name=w.name) - for w in self.get_layer(f"{PREPROCESSING_PREFIX}_{i_replica}").weights - ] - weights = {NN_PREFIX: NN_weights, PREPROCESSING_PREFIX: prepro_weights} + weights = {} + for layer_type in [NN_LAYER_ALL_REPLICAS, PREPROCESSING_LAYER_ALL_REPLICAS]: + layer = self.get_layer(layer_type) + weights[layer_type] = get_layer_replica_weights(layer, i_replica) return weights @@ -378,10 +375,9 @@ def set_replica_weights(self, weights, i_replica=0): i_replica: int the replica number to set, defaulting to 0 """ - self.get_layer(f"{NN_PREFIX}_{i_replica}").set_weights(weights[NN_PREFIX]) - self.get_layer(f"{PREPROCESSING_PREFIX}_{i_replica}").set_weights( - weights[PREPROCESSING_PREFIX] - ) + for layer_type in [NN_LAYER_ALL_REPLICAS, PREPROCESSING_LAYER_ALL_REPLICAS]: + layer = self.get_layer(layer_type) + set_layer_replica_weights(layer=layer, weights=weights[layer_type], i_replica=i_replica) def split_replicas(self): """ @@ -411,51 +407,85 @@ def load_identical_replicas(self, model_file): """ From a single replica model, load the same weights into all replicas. """ - weights = self._format_weights_from_file(model_file) + single_replica = self.single_replica_generator() + single_replica.load_weights(model_file) + weights = single_replica.get_replica_weights(0) for i_replica in range(self.num_replicas): self.set_replica_weights(weights, i_replica) - def _format_weights_from_file(self, model_file): - """Read weights from a .h5 file and format into a dictionary of tf.Variables""" - weights = {} - with h5py.File(model_file, 'r') as f: - # look at layers of the form NN_i and take the lowest i - i_replica = 0 - while f"{NN_PREFIX}_{i_replica}" not in f: - i_replica += 1 +def is_stacked_single_replicas(layer): + """ + Check if the layer consists of stacked single replicas (Only happens for NN layers), + to determine how to extract single replica weights. - weights[NN_PREFIX] = self._extract_weights( - f[f"{NN_PREFIX}_{i_replica}"], NN_PREFIX, i_replica - ) - weights[PREPROCESSING_PREFIX] = self._extract_weights( - f[f"{PREPROCESSING_PREFIX}_{i_replica}"], PREPROCESSING_PREFIX, i_replica - ) + Parameters + ---------- + layer: MetaLayer + the layer to check - return weights + Returns + ------- + bool + True if the layer consists of stacked single replicas + """ + if not isinstance(layer, MetaModel): + return False + return f"{NN_PREFIX}_0" in [sublayer.name for sublayer in layer.layers] + + +def get_layer_replica_weights(layer, i_replica: int): + """ + Get the weights for the given single replica ``i_replica``, + from a ``layer`` that contains the weights of all the replicas. + + Note that the layer could be a complete NN with many separated sub_layers + each of which containing weights for all replicas together. + This functions separates the per-replica weights and returns the list of weight as if the + input ``layer`` were made of _only_ replica ``i_replica``. + + Parameters + ---------- + layer: MetaLayer + the layer to get the weights from + i_replica: int + the replica number + + Returns + ------- + weights: list + list of weights for the replica + """ + if is_stacked_single_replicas(layer): + weights = layer.get_layer(f"{NN_PREFIX}_{i_replica}").weights + else: + weights = [tf.Variable(w[i_replica : i_replica + 1], name=w.name) for w in layer.weights] + + return weights + + +def set_layer_replica_weights(layer, weights, i_replica: int): + """ + Set the weights for the given single replica ``i_replica``. + When the input ``layer`` contains weights for many replicas, ensures that + only those corresponding to replica ``i_replica`` are updated. + + Parameters + ---------- + layer: MetaLayer + the layer to set the weights for + weights: list + list of weights for the replica + i_replica: int + the replica number + """ + if is_stacked_single_replicas(layer): + layer.get_layer(f"{NN_PREFIX}_{i_replica}").set_weights(weights) + return + + full_weights = [w.numpy() for w in layer.weights] + for w_old, w_new in zip(full_weights, weights): + w_old[i_replica : i_replica + 1] = w_new - def _extract_weights(self, h5_group, weights_key, i_replica): - """Extract weights from a h5py group, turning them into Tensorflow variables""" - weights = [] - - def append_weights(name, node): - if isinstance(node, h5py.Dataset): - weight_name = node.name.split("/", 2)[-1] - weight_name = weight_name.replace(f"{NN_PREFIX}_{i_replica}", f"{NN_PREFIX}_0") - weight_name = weight_name.replace( - f"{PREPROCESSING_PREFIX}_{i_replica}", f"{PREPROCESSING_PREFIX}_0" - ) - weights.append(tf.Variable(node[()], name=weight_name)) - - h5_group.visititems(append_weights) - - # have to put them in the same order - weights_ordered = [] - weights_model_order = [w.name for w in self.get_replica_weights(0)[weights_key]] - for w in weights_model_order: - for w_h5 in weights: - if w_h5.name == w: - weights_ordered.append(w_h5) - - return weights_ordered + layer.set_weights(full_weights) diff --git a/n3fit/src/n3fit/layers/msr_normalization.py b/n3fit/src/n3fit/layers/msr_normalization.py index 0755cad0b4..01a3648cb7 100644 --- a/n3fit/src/n3fit/layers/msr_normalization.py +++ b/n3fit/src/n3fit/layers/msr_normalization.py @@ -40,6 +40,7 @@ def __init__(self, mode: str = "ALL", replicas: int = 1, **kwargs): else: raise ValueError(f"Mode {mode} not accepted for sum rules") + self.replicas = replicas indices = [] self.divisor_indices = [] if self._msr_enabled: @@ -83,6 +84,7 @@ def call(self, pdf_integrated, photon_integral): reshape = lambda x: op.transpose(x[0]) y = reshape(pdf_integrated) photon_integral = reshape(photon_integral) + numerators = [] if self._msr_enabled: @@ -96,8 +98,9 @@ def call(self, pdf_integrated, photon_integral): divisors = op.gather(y, self.divisor_indices, axis=0) # Fill in the rest of the flavours with 1 + num_flavours = y.shape[0] norm_constants = op.scatter_to_one( - numerators / divisors, indices=self.indices, output_shape=y.shape + numerators / divisors, indices=self.indices, output_shape=(num_flavours, self.replicas) ) return op.batchit(op.transpose(norm_constants), batch_dimension=1) diff --git a/n3fit/src/n3fit/layers/preprocessing.py b/n3fit/src/n3fit/layers/preprocessing.py index 77ea760607..f8ab1f8f55 100644 --- a/n3fit/src/n3fit/layers/preprocessing.py +++ b/n3fit/src/n3fit/layers/preprocessing.py @@ -33,6 +33,8 @@ class Preprocessing(MetaLayer): Whether large x preprocessing factor should be active seed: int seed for the initializer of the random alpha and beta values + num_replicas: int (default 1) + The number of replicas """ def __init__( @@ -40,6 +42,7 @@ def __init__( flav_info: Optional[list] = None, seed: int = 0, large_x: bool = True, + num_replicas: int = 1, **kwargs, ): if flav_info is None: @@ -49,6 +52,8 @@ def __init__( self.flav_info = flav_info self.seed = seed self.large_x = large_x + self.num_replicas = num_replicas + self.alphas = [] self.betas = [] super().__init__(**kwargs) @@ -87,7 +92,7 @@ def generate_weight(self, name: str, kind: str, dictionary: dict, set_to_zero: b # Generate the new trainable (or not) parameter newpar = self.builder_helper( name=name, - kernel_shape=(1,), + kernel_shape=(self.num_replicas, 1), initializer=initializer, trainable=trainable, constraint=constraint, @@ -117,9 +122,12 @@ def call(self, x): Returns ------- - prefactor: tensor(shape=[1,N,F]) + prefactor: tensor(shape=[1,R,N,F]) """ - alphas = op.stack(self.alphas, axis=1) - betas = op.stack(self.betas, axis=1) + # weight tensors of shape (R, 1, F) + alphas = op.stack(self.alphas, axis=-1) + betas = op.stack(self.betas, axis=-1) + + x = op.batchit(x, batch_dimension=0) return x ** (1 - alphas) * (1 - x) ** betas diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 09f337dbca..fc180f392f 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -14,7 +14,16 @@ import numpy as np -from n3fit.backends import Input, Lambda, MetaLayer, MetaModel, base_layer_selector +from n3fit.backends import ( + NN_LAYER_ALL_REPLICAS, + NN_PREFIX, + PREPROCESSING_LAYER_ALL_REPLICAS, + Input, + Lambda, + MetaLayer, + MetaModel, + base_layer_selector, +) from n3fit.backends import operations as op from n3fit.backends import regularizer_selector from n3fit.layers import ( @@ -572,18 +581,14 @@ def pdfNN_layer_generator( else: sumrule_layer = lambda x: x - # Only these layers change from replica to replica: - preprocessing_factor_replicas = [] - for i_replica, replica_seed in enumerate(seed): - preprocessing_factor_replicas.append( - Preprocessing( - flav_info=flav_info, - input_shape=(1,), - name=f"preprocessing_factor_{i_replica}", - seed=replica_seed + number_of_layers, - large_x=not subtract_one, - ) - ) + compute_preprocessing_factor = Preprocessing( + flav_info=flav_info, + input_shape=(1,), + name=PREPROCESSING_LAYER_ALL_REPLICAS, + seed=seed[0] + number_of_layers, + large_x=not subtract_one, + num_replicas=num_replicas, + ) nn_replicas = generate_nn( layer_type=layer_type, @@ -598,38 +603,28 @@ def pdfNN_layer_generator( last_layer_nodes=last_layer_nodes, ) - # Apply NN layers for all replicas to a given input grid - def neural_network_replicas(x, postfix=""): - NNs_x = Lambda(lambda nns: op.stack(nns, axis=1), name=f"NNs{postfix}")( - [nn(x) for nn in nn_replicas] - ) + # The NN subtracted by NN(1), if applicable + def nn_subtracted(x): + NNs_x = nn_replicas(x) if subtract_one: x_eq_1_processed = process_input(layer_x_eq_1) - NNs_x_1 = Lambda(lambda nns: op.stack(nns, axis=1), name=f"NNs{postfix}_x_1")( - [nn(x_eq_1_processed) for nn in nn_replicas] - ) + NNs_x_1 = nn_replicas(x_eq_1_processed) NNs_x = subtract_one_layer([NNs_x, NNs_x_1]) return NNs_x - # Apply preprocessing factors for all replicas to a given input grid - def preprocessing_replicas(x, postfix=""): - return Lambda(lambda pfs: op.stack(pfs, axis=1), name=f"prefactors{postfix}")( - [pf(x) for pf in preprocessing_factor_replicas] - ) - - def compute_unnormalized_pdf(x, postfix=""): + def compute_unnormalized_pdf(x): # Preprocess the input grid x_nn_input = extract_nn_input(x) x_processed = process_input(x_nn_input) x_original = extract_original(x) # Compute the neural network output - NNs_x = neural_network_replicas(x_processed, postfix=postfix) + NNs_x = nn_subtracted(x_processed) # Compute the preprocessing factor - preprocessing_factors_x = preprocessing_replicas(x_original, postfix=postfix) + preprocessing_factors_x = compute_preprocessing_factor(x_original) # Apply the preprocessing factor pref_NNs_x = apply_preprocessing_factor([preprocessing_factors_x, NNs_x]) @@ -646,7 +641,7 @@ def compute_unnormalized_pdf(x, postfix=""): PDFs_unnormalized = compute_unnormalized_pdf(pdf_input) if impose_sumrule: - PDFs_integration_grid = compute_unnormalized_pdf(integrator_input, postfix="_x_integ") + PDFs_integration_grid = compute_unnormalized_pdf(integrator_input) if photons: # add batch and flavor dimensions @@ -670,11 +665,10 @@ def compute_unnormalized_pdf(x, postfix=""): if photons: PDFs = layer_photon(PDFs) - if replica_axis: - pdf_model = MetaModel(model_input, PDFs, name=f"PDFs", scaler=scaler) - else: - pdf_model = MetaModel(model_input, PDFs[:, 0], name=f"PDFs", scaler=scaler) + if not replica_axis: + PDFs = Lambda(lambda pdfs: pdfs[:, 0], name="remove_replica_axis")(PDFs) + pdf_model = MetaModel(model_input, PDFs, name=f"PDFs", scaler=scaler) return pdf_model @@ -719,8 +713,8 @@ def generate_nn( Returns ------- - nn_replicas: List[MetaModel] - List of MetaModel objects, one for each replica. + nn_replicas: MetaModel + Single model containing all replicas. """ nodes_list = list(nodes) # so we can modify it x_input = Input(shape=(None, nodes_in), batch_size=1, name='xgrids_processed') @@ -744,7 +738,7 @@ def initializer_generator(seed, i_layer): ] return initializers - elif layer_type == "dense": + else: # "dense" reg = regularizer_selector(regularizer, **regularizer_args) custom_args['regularizer'] = reg @@ -782,6 +776,7 @@ def initializer_generator(seed, i_layer): # Apply all layers to the input to create the models pdfs = [layer(x_input) for layer in list_of_pdf_layers[0]] + for layers in list_of_pdf_layers[1:]: # Since some layers (dropout) are shared, we have to treat them separately if type(layers) is list: @@ -789,9 +784,12 @@ def initializer_generator(seed, i_layer): else: pdfs = [layers(x) for x in pdfs] - models = [ - MetaModel({'NN_input': x_input}, pdf, name=f"NN_{i_replica}") + # Wrap the pdfs in a MetaModel to enable getting/setting of weights later + pdfs = [ + MetaModel({'NN_input': x_input}, pdf, name=f"{NN_PREFIX}_{i_replica}")(x_input) for i_replica, pdf in enumerate(pdfs) ] + pdfs = Lambda(lambda nns: op.stack(nns, axis=1), name=f"stack_replicas")(pdfs) + model = MetaModel({'NN_input': x_input}, pdfs, name=NN_LAYER_ALL_REPLICAS) - return models + return model diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 99e9f013db..acbcc8b3cd 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -17,6 +17,7 @@ from n3fit import model_gen from n3fit.backends import MetaModel, callbacks, clear_backend_state from n3fit.backends import operations as op +from n3fit.backends import NN_LAYER_ALL_REPLICAS import n3fit.hyper_optimization.penalties import n3fit.hyper_optimization.rewards from n3fit.scaler import generate_scaler @@ -454,7 +455,7 @@ def _model_generation(self, xinput, pdf_model, partition, partition_idx): training.summary() pdf_model = training.get_layer("PDFs") pdf_model.summary() - nn_model = pdf_model.get_layer("NN_0") + nn_model = pdf_model.get_layer(NN_LAYER_ALL_REPLICAS) nn_model.summary() # We may have fits without sumrules imposed try: diff --git a/n3fit/src/n3fit/tests/regressions/weights_1.h5 b/n3fit/src/n3fit/tests/regressions/weights_1.h5 index 2c5b02e71e..7f9f930184 100644 Binary files a/n3fit/src/n3fit/tests/regressions/weights_1.h5 and b/n3fit/src/n3fit/tests/regressions/weights_1.h5 differ diff --git a/n3fit/src/n3fit/tests/regressions/weights_2.h5 b/n3fit/src/n3fit/tests/regressions/weights_2.h5 index e5a8adea86..51061a63f2 100644 Binary files a/n3fit/src/n3fit/tests/regressions/weights_2.h5 and b/n3fit/src/n3fit/tests/regressions/weights_2.h5 differ diff --git a/n3fit/src/n3fit/tests/test_modelgen.py b/n3fit/src/n3fit/tests/test_modelgen.py index d50b98728a..ffdaa254b6 100644 --- a/n3fit/src/n3fit/tests/test_modelgen.py +++ b/n3fit/src/n3fit/tests/test_modelgen.py @@ -5,6 +5,7 @@ It checks that both the number of layers and the shape of the weights of the layers are what is expected """ +from n3fit.backends import NN_PREFIX from n3fit.model_gen import generate_nn INSIZE = 16 @@ -25,7 +26,7 @@ def test_generate_dense_network(): - nn = generate_nn("dense", **COMMON_ARGS)[0] + nn = generate_nn("dense", **COMMON_ARGS).get_layer(f"{NN_PREFIX}_0") # The number of layers should be input layer + len(OUT_SIZES) assert len(nn.layers) == len(OUT_SIZES) + 1 @@ -40,7 +41,7 @@ def test_generate_dense_network(): def test_generate_dense_per_flavour_network(): - nn = generate_nn("dense_per_flavour", **COMMON_ARGS)[0] + nn = generate_nn("dense_per_flavour", **COMMON_ARGS).get_layer(f"{NN_PREFIX}_0") # The number of layers should be input + BASIS_SIZE*len(OUT_SIZES) + concatenate assert len(nn.layers) == BASIS_SIZE * len(OUT_SIZES) + 2 diff --git a/n3fit/src/n3fit/tests/test_preprocessing.py b/n3fit/src/n3fit/tests/test_preprocessing.py index 42b020bdd4..1ec48cef9d 100644 --- a/n3fit/src/n3fit/tests/test_preprocessing.py +++ b/n3fit/src/n3fit/tests/test_preprocessing.py @@ -22,45 +22,47 @@ def test_preprocessing(): test_prefactors = [ [ [ - 3.7446213e-01, - 1.9785003e-01, - 2.7931085e-01, - 2.0784079e-01, - 4.5369801e-01, - 2.7796263e-01, - 5.4610312e-01, - 2.4907256e-02, - ], - [ - 6.2252983e-04, - 3.0504008e-05, - 4.5713778e-03, - 1.0905267e-03, - 4.0506415e-02, - 5.9004971e-05, - 4.5114113e-03, - 2.6757403e-09, - ], - [ - 4.1631009e-02, - 1.0586979e-02, - 8.3202787e-02, - 4.3506064e-02, - 2.2559988e-01, - 1.5161950e-02, - 1.0105091e-01, - 1.4808348e-04, - ], - [ - 1.1616933e-01, - 4.2717375e-02, - 1.5620175e-01, - 9.7478621e-02, - 3.2600221e-01, - 5.8901049e-02, - 2.1937098e-01, - 1.8343410e-03, - ], + [ + 3.7446213e-01, + 1.9785003e-01, + 2.7931085e-01, + 2.0784079e-01, + 4.5369801e-01, + 2.7796263e-01, + 5.4610312e-01, + 2.4907256e-02, + ], + [ + 6.2252983e-04, + 3.0504008e-05, + 4.5713778e-03, + 1.0905267e-03, + 4.0506415e-02, + 5.9004971e-05, + 4.5114113e-03, + 2.6757403e-09, + ], + [ + 4.1631009e-02, + 1.0586979e-02, + 8.3202787e-02, + 4.3506064e-02, + 2.2559988e-01, + 1.5161950e-02, + 1.0105091e-01, + 1.4808348e-04, + ], + [ + 1.1616933e-01, + 4.2717375e-02, + 1.5620175e-01, + 9.7478621e-02, + 3.2600221e-01, + 5.8901049e-02, + 2.1937098e-01, + 1.8343410e-03, + ], + ] ] ] prefactors = prepro(test_x) diff --git a/n3fit/src/n3fit/vpinterface.py b/n3fit/src/n3fit/vpinterface.py index 7e2e754e0f..12e5aca374 100644 --- a/n3fit/src/n3fit/vpinterface.py +++ b/n3fit/src/n3fit/vpinterface.py @@ -24,6 +24,7 @@ import numpy as np import numpy.linalg as la +from n3fit.backends import PREPROCESSING_LAYER_ALL_REPLICAS from validphys.arclength import arc_lengths, integrability_number from validphys.core import PDF, MCStats from validphys.lhapdfset import LHAPDFSet @@ -224,13 +225,7 @@ def get_preprocessing_factors(self, replica=None): if replica is None: replica = 1 # Replicas start counting in 1 so: - preprocessing_layers = self._models[replica - 1].get_layer_re(r"preprocessing_factor_\d") - if len(preprocessing_layers) > 1: - # We really don't want to fail at this point, but print a warning at least... - log.warning("More than one preprocessing layer found within the model!") - elif len(preprocessing_layers) < 1: - log.warning("No preprocessing layer found within the model!") - preprocessing_layer = preprocessing_layers[0] + preprocessing_layer = self._models[replica - 1].get_layer(PREPROCESSING_LAYER_ALL_REPLICAS) alphas_and_betas = None if self.fit_basis is not None: