From acfda7ff0ae6a5b32667cbdbfcf2b1865492ab75 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 15 Dec 2020 16:27:15 +0100 Subject: [PATCH 01/27] fit many models at once --- n3fit/src/n3fit/ModelTrainer.py | 45 +++--- .../n3fit/backends/keras_backend/losses.py | 4 +- .../backends/keras_backend/operations.py | 14 +- n3fit/src/n3fit/layers/DY.py | 2 +- n3fit/src/n3fit/layers/Rotations.py | 4 +- n3fit/src/n3fit/model_gen.py | 136 +++++++++--------- n3fit/src/n3fit/msr.py | 5 +- n3fit/src/n3fit/performfit.py | 58 +++++--- 8 files changed, 152 insertions(+), 116 deletions(-) diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py index 1fa73109b8..c7f4279850 100644 --- a/n3fit/src/n3fit/ModelTrainer.py +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -177,8 +177,9 @@ def __init__( save_weights_each=False, kfold_parameters=None, max_cores=None, - model_file=None, + model_file=None, sum_rules=True, + parallel_models=1 ): """ Parameters @@ -213,6 +214,7 @@ def __init__( self.debug = debug self.save_weights_each = save_weights_each self.all_datasets = [] + self.parallel_models = parallel_models # Initialise internal variables which define behaviour if debug: @@ -359,7 +361,7 @@ def _fill_the_dictionaries(self): self.training["expdata"].append(integ_dict["expdata"]) self.training["integdatasets"].append(integ_dict["name"]) - def _model_generation(self, pdf_model, partition): + def _model_generation(self, pdf_models, partition): """ Fills the three dictionaries (``training``, ``validation``, ``experimental``) with the ``model`` entry @@ -398,13 +400,23 @@ def _model_generation(self, pdf_model, partition): input_arr = np.concatenate(self.input_list, axis=1) input_layer = operations.numpy_to_input(input_arr.T) - # The input to the full model is expected to be the input to the PDF - # by reutilizing `pdf_model.parse_input` we ensure any auxiliary input is also accunted fro - full_model_input_dict = pdf_model._parse_input([input_layer], pass_content=False) + # The trainable part of the model is a concatenation of all PDF models + # where each model corresponds to a different replica + all_replicas_pdf = [] + + for pdf_model in pdf_models: + # The input to the full model is expected to be the input to the PDF + # by reutilizing `pdf_model.parse_input` we ensure any auxiliary input is also accunted fro + full_model_input_dict = pdf_model._parse_input([input_layer], pass_content=False) + + # The output of the pdf on input_layer will be thus a concatenation + # of the PDF values for all experiments + full_pdf = pdf_model.apply_as_layer([input_layer]) + + all_replicas_pdf.append(full_pdf) + + full_pdf_per_replica = operations.stack(all_replicas_pdf, axis=-1) - # The output of the pdf on input_layer will be thus a concatenation - # of the PDF values for all experiments - full_pdf = pdf_model.apply_as_layer([input_layer]) # The input layer is a concatenation of all experiments # we need now to split the output on a different array per experiment sp_ar = [self.input_sizes] @@ -412,7 +424,7 @@ def _model_generation(self, pdf_model, partition): splitting_layer = operations.as_layer( operations.split, op_args=sp_ar, op_kwargs=sp_kw, name="pdf_split" ) - splitted_pdf = splitting_layer(full_pdf) + splitted_pdf = splitting_layer(full_pdf_per_replica) # If we are in a kfolding partition, select which datasets are out if partition: @@ -610,11 +622,11 @@ def _generate_pdf( pdf_model: MetaModel pdf model """ - log.info("Generating PDF model") + log.info("Generating PDF models") # Set the parameters of the NN # Generate the NN layers - pdf_model = model_gen.pdfNN_layer_generator( + pdf_models = model_gen.pdfNN_layer_generator( nodes=nodes_per_layer, activations=activation_per_layer, layer_type=layer_type, @@ -626,8 +638,9 @@ def _generate_pdf( regularizer=regularizer, regularizer_args=regularizer_args, impose_sumrule=self.impose_sumrule, + parallel_models=self.parallel_models ) - return pdf_model + return pdf_models def _assign_data(self, models, fold_k=0): """Assign to each model the data to compare with as well as the @@ -814,7 +827,7 @@ def hyperparametrizable(self, params): seed = np.random.randint(0, pow(2, 31)) # Generate the pdf model - pdf_model = self._generate_pdf( + pdf_models = self._generate_pdf( params["nodes_per_layer"], params["activation_per_layer"], params["initializer"], @@ -827,7 +840,7 @@ def hyperparametrizable(self, params): # Model generation joins all the different observable layers # together with pdf model generated above - models = self._model_generation(pdf_model, partition) + models = self._model_generation(pdf_models, partition) # Only after model generation, apply possible weight file if self.model_file: @@ -860,7 +873,7 @@ def hyperparametrizable(self, params): stopping_object = Stopping( validation_model, reporting, - pdf_model, + pdf_models[0], # TODO total_epochs=epochs, stopping_patience=stopping_epochs, save_weights_each=self.save_weights_each, @@ -934,7 +947,7 @@ def hyperparametrizable(self, params): dict_out["stopping_object"] = stopping_object dict_out["experimental"] = self.experimental dict_out["training"] = self.training - dict_out["pdf_model"] = pdf_model + dict_out["pdf_models"] = pdf_models # Only after the training has finished, we save all models for future reporting self.model_dicts = model_dicts diff --git a/n3fit/src/n3fit/backends/keras_backend/losses.py b/n3fit/src/n3fit/backends/keras_backend/losses.py index c4b427dd68..3bb23ce329 100644 --- a/n3fit/src/n3fit/backends/keras_backend/losses.py +++ b/n3fit/src/n3fit/backends/keras_backend/losses.py @@ -16,9 +16,7 @@ def l_invcovmat(invcovmat_np): def true_loss(y_true, y_pred): # (yt - yp) * covmat * (yt - yp) tmp = y_true - y_pred - right_dot = tf.tensordot(invcovmat, K.transpose(tmp), axes=1) - res = tf.tensordot(tmp, right_dot, axes=1) - return tf.reshape(res, (-1,)) + return tf.einsum('bri,ij,brj->b', tmp, invcovmat, tmp) return true_loss diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py index 5bad7ef570..847168ae38 100644 --- a/n3fit/src/n3fit/backends/keras_backend/operations.py +++ b/n3fit/src/n3fit/backends/keras_backend/operations.py @@ -234,6 +234,13 @@ def concatenate(tensor_list, axis=-1, target_shape=None, name=None): return concatenated_tensor +def stack(tensor_list, axis=0, **kwargs): + """ Stack a list of tensors + see full `docs `_ + """ + return tf.stack(tensor_list, axis=axis, **kwargs) + + # Mathematical operations def pdf_masked_convolution(raw_pdf, basis_mask): """ Computes a masked convolution of two equal pdfs @@ -254,11 +261,8 @@ def pdf_masked_convolution(raw_pdf, basis_mask): rank3 (len(mask_true), xgrid, xgrid) """ pdf = tf.squeeze(raw_pdf, axis=0) # remove the batchsize - luminosity = tensor_product(pdf, pdf, axes=0) - # (xgrid, flavour, xgrid, flavour) - # reshape to put the flavour indices at the beginning to apply mask - lumi_tmp = K.permute_dimensions(luminosity, (3, 1, 2, 0)) - pdf_x_pdf = boolean_mask(lumi_tmp, basis_mask) + luminosity = tf.einsum('air,bjr->jibar', pdf, pdf) + pdf_x_pdf = boolean_mask(luminosity, basis_mask) return pdf_x_pdf diff --git a/n3fit/src/n3fit/layers/DY.py b/n3fit/src/n3fit/layers/DY.py index 0cacff7897..3c70d052f3 100644 --- a/n3fit/src/n3fit/layers/DY.py +++ b/n3fit/src/n3fit/layers/DY.py @@ -60,5 +60,5 @@ def call(self, pdf_raw): results.append(res) # the masked convolution removes the batch dimension - ret = self.operation(results) + ret = op.transpose(self.operation(results)) return op.batchit(ret) diff --git a/n3fit/src/n3fit/layers/Rotations.py b/n3fit/src/n3fit/layers/Rotations.py index 0933d0d0d0..0ccca32c0b 100644 --- a/n3fit/src/n3fit/layers/Rotations.py +++ b/n3fit/src/n3fit/layers/Rotations.py @@ -64,9 +64,9 @@ class FkRotation(MetaLayer): # TODO: Generate a rotation matrix in the input and just do tf.tensordot in call # the matrix should be: (8, 14) so that we can just do tf.tensordot(pdf, rotmat, axes=1) # i.e., create the matrix and inherit from the Rotation layer above - def __init__(self, output_dim=14, **kwargs): + def __init__(self, output_dim=14, name="evolution", **kwargs): self.output_dim = output_dim - super().__init__(**kwargs, name="evolution") + super().__init__(name, **kwargs) def call(self, pdf_raw): # Transpose the PDF so that the flavour index is the first one diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 03857e4fbd..55af577085 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -131,7 +131,7 @@ def observable_generator(spec_dict, positivity_initial=1.0, integrability=False) # Prepare a concatenation as experiments are one single entity formed by many datasets def gen_concat(name): - return operations.as_layer(operations.concatenate, op_kwargs={"axis": 1}, name=name) + return operations.as_layer(operations.concatenate, op_kwargs={"axis": 2}, name=name) # Tensorflow operations have ugly name, # we want the final observables to be named just {spec_name} (with'val/exp' if needed) @@ -360,6 +360,7 @@ def pdfNN_layer_generator( regularizer=None, regularizer_args=None, impose_sumrule=False, + parallel_models=1, ): # pylint: disable=too-many-locals """ Generates the PDF model which takes as input a point in x (from 0 to 1) @@ -442,6 +443,7 @@ def pdfNN_layer_generator( model_pdf: n3fit.backends.MetaModel a model f(x) = y where x is a tensor (1, xgrid, 1) and y a tensor (1, xgrid, out) """ + # Parse the input configuration if nodes is None: nodes = [15, 8] ln = len(nodes) @@ -462,84 +464,90 @@ def pdfNN_layer_generator( raise ValueError( "Number of activation functions does not match number of layers @ model_gen.py" ) - # The number of nodes in the last layer is equal to the number of fitted flavours (== len(flav_info)) last_layer_nodes = nodes[-1] - if layer_type == "dense": - reg = regularizer_selector(regularizer, **regularizer_args) - list_of_pdf_layers = generate_dense_network( - inp, - nodes, - activations, - initializer_name, - seed=seed, - dropout_rate=dropout, - regularizer=reg, - ) - elif layer_type == "dense_per_flavour": - # Define the basis size attending to the last layer in the network - # TODO: this information should come from the basis information - # once the basis information is passed to this class - list_of_pdf_layers = generate_dense_per_flavour_network( - inp, nodes, activations, initializer_name, seed=seed, basis_size=last_layer_nodes, - ) + # Generate the generic layers + + # Prepare the input for the PDF model + placeholder_input = Input(shape=(None, 1), batch_size=1) # If the input is of type (x, logx) # create a x --> (x, logx) layer to preppend to everything if inp == 2: add_log = Lambda(lambda x: operations.concatenate([x, operations.op_log(x)], axis=-1)) - def dense_me(x): - """Takes an input tensor `x` and applies all layers - from the `list_of_pdf_layers` in order""" - if inp == 1: - curr_fun = list_of_pdf_layers[0](x) - else: - curr_fun = list_of_pdf_layers[0](add_log(x)) - - for dense_layer in list_of_pdf_layers[1:]: - curr_fun = dense_layer(curr_fun) - return curr_fun - - # Preprocessing layer (will be multiplied to the last of the denses) - preproseed = seed + number_of_layers - layer_preproc = Preprocessing( - input_shape=(1,), - name="pdf_prepro", - flav_info=flav_info, - seed=preproseed, - output_dim=last_layer_nodes - ) + # Evolution layer + layer_evln = FkRotation(input_shape=(last_layer_nodes,), output_dim=out) + # Basis rotation basis_rotation = FlavourToEvolution(flav_info=flav_info, fitbasis=fitbasis) - # Evolution layer - layer_evln = FkRotation(input_shape=(last_layer_nodes,), output_dim=out) + integrator_input = None # TODO + pdf_models = [] + + # Now we need a trainable network per model to be trained in parallel + for i in range(parallel_models): + layer_seed = seed + i*number_of_layers + if layer_type == "dense": + reg = regularizer_selector(regularizer, **regularizer_args) + list_of_pdf_layers = generate_dense_network( + inp, + nodes, + activations, + initializer_name, + seed=seed, + dropout_rate=dropout, + regularizer=reg, + ) + elif layer_type == "dense_per_flavour": + # Define the basis size attending to the last layer in the network + # TODO: this information should come from the basis information + # once the basis information is passed to this class + list_of_pdf_layers = generate_dense_per_flavour_network( + inp, nodes, activations, initializer_name, seed=layer_seed, basis_size=last_layer_nodes, + ) - # Apply preprocessing and basis - def layer_fitbasis(x): - ret = operations.op_multiply([dense_me(x), layer_preproc(x)]) - if basis_rotation.is_identity(): - # if we don't need to rotate basis we don't want spurious layers - return ret - return basis_rotation(ret) + def dense_me(x): + """Takes an input tensor `x` and applies all layers + from the `list_of_pdf_layers` in order""" + if inp == 1: + curr_fun = list_of_pdf_layers[0](x) + else: + curr_fun = list_of_pdf_layers[0](add_log(x)) - # Rotation layer, changes from the 8-basis to the 14-basis - def layer_pdf(x): - return layer_evln(layer_fitbasis(x)) + for dense_layer in list_of_pdf_layers[1:]: + curr_fun = dense_layer(curr_fun) + return curr_fun - # Prepare the input for the PDF model - placeholder_input = Input(shape=(None, 1), batch_size=1) + # Preprocessing layer (will be multiplied to the last of the denses) + preproseed = seed + number_of_layers*(i+1) + layer_preproc = Preprocessing( + input_shape=(1,), name=f"pdf_prepro_{i}", flav_info=flav_info, seed=preproseed + ) - # Impose sumrule if necessary - if impose_sumrule: - layer_pdf, integrator_input = msr_constraints.msr_impose(layer_fitbasis, layer_pdf, mode=impose_sumrule) - model_input = [integrator_input, placeholder_input] - else: - integrator_input = None - model_input = [placeholder_input] + # Apply preprocessing and basis + def layer_fitbasis(x): + ret = operations.op_multiply([dense_me(x), layer_preproc(x)]) + if basis_rotation.is_identity(): + # if we don't need to rotate basis we don't want spurious layers + return ret + return basis_rotation(ret) + + # Rotation layer, changes from the 8-basis to the 14-basis + def layer_pdf(x): + return layer_evln(layer_fitbasis(x)) + + # Impose sumrule if necessary #TODO still a lot of repetition going on inside the MSR, but not important -for now- + if impose_sumrule: + layer_pdf, integrator_input = msr_constraints.msr_impose(layer_fitbasis, layer_pdf, mode=impose_sumrule) + model_input = [integrator_input, placeholder_input] + else: + integrator_input = None + model_input = [placeholder_input] + + pdf_model = MetaModel(model_input, layer_pdf(placeholder_input), name=f"PDF_{i}") - pdf_model = MetaModel(model_input, layer_pdf(placeholder_input), name="PDF") + pdf_models.append(pdf_model) - return pdf_model + return pdf_models diff --git a/n3fit/src/n3fit/msr.py b/n3fit/src/n3fit/msr.py index 1f552888f1..e7d706131d 100644 --- a/n3fit/src/n3fit/msr.py +++ b/n3fit/src/n3fit/msr.py @@ -37,7 +37,7 @@ def gen_integration_input(nx): return xgrid, weights_array -def msr_impose(fit_layer, final_pdf_layer, mode='All', verbose=False): +def msr_impose(fit_layer, final_pdf_layer, mode='All', xgrid_input=None, verbose=False): """ This function receives: - fit_layer: the 8-basis layer of PDF which we fit @@ -64,7 +64,8 @@ def pdf_integrand(x): normalizer = MSR_Normalization(input_shape=(8,), mode=mode) # 5. Make the xgrid numpy array into a backend input layer so it can be given - xgrid_input = operations.numpy_to_input(xgrid) + if xgrid_input is None: + xgrid_input = operations.numpy_to_input(xgrid) normalization = normalizer(integrator(pdf_integrand(xgrid_input))) def ultimate_pdf(x): diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index b12e6359ea..607d23f163 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -92,6 +92,7 @@ def performfit( hyperopt=None, debug=False, maxcores=None, + parallel_models=1, ): """ This action will (upon having read a validcard) process a full PDF fit for a given replica. @@ -243,7 +244,11 @@ def performfit( kfold_parameters=kfold_parameters, max_cores=maxcores, model_file=fitting.get("load"), +<<<<<<< HEAD sum_rules=fitting.get("sum_rules", True) +======= + parallel_models=parallel_models +>>>>>>> 6e0fc5f2c (fit many models at once) ) # This is just to give a descriptive name to the fit function @@ -297,7 +302,7 @@ def performfit( # After the fit is run we get a 'result' dictionary with the following items: stopping_object = result["stopping_object"] - pdf_model = result["pdf_model"] + pdf_models = result["pdf_models"] true_chi2 = result["loss"] training = result["training"] log.info("Total exp chi2: %s", true_chi2) @@ -317,31 +322,38 @@ def performfit( ) ) - # Create a pdf instance - pdf_instance = N3PDF(pdf_model, fit_basis=fitting.get("basis")) + final_time = stopwatch.stop() - # Generate the writer wrapper - writer_wrapper = WriterWrapper( - replica_number, - pdf_instance, - stopping_object, - theoryid.get_description().get("Q0") ** 2, - stopwatch.stop(), - ) + for i, pdf_model in enumerate(pdf_models): + # Each model goes into its own replica folder + replica_path_set = replica_path / f"replica_{replica_number + i}" - # Now write the data down - training_chi2, val_chi2, exp_chi2 = the_model_trainer.evaluate(stopping_object) - writer_wrapper.write_data( - replica_path_set, output_path.name, training_chi2, val_chi2, true_chi2 - ) + # Create a pdf instance + pdf_instance = N3PDF(pdf_model, fit_basis=fitting.get("basis")) + + # Generate the writer wrapper + writer_wrapper = WriterWrapper( + replica_number, + pdf_instance, + stopping_object, # TODO + theoryid.get_description().get("Q0") ** 2, + final_time, + ) + + # Now write the data down + # TODO: recompute training, valudation and experimental _per_ pdfmodel + training_chi2, val_chi2, exp_chi2 = the_model_trainer.evaluate(stopping_object) + writer_wrapper.write_data( + replica_path_set, output_path.name, training_chi2, val_chi2, true_chi2 + ) - # Save the weights to some file for the given replica - model_file = fitting.get("save") - if model_file: - model_file_path = replica_path_set / model_file - log.info(" > Saving the weights for future in %s", model_file_path) - # Need to use "str" here because TF 2.2 has a bug for paths objects (fixed in 2.3 though) - pdf_model.save_weights(str(model_file_path), save_format="h5") + # Save the weights to some file for the given replica + model_file = fitting.get("save") + if model_file: + model_file_path = replica_path_set / model_file + log.info(" > Saving the weights for future in %s", model_file_path) + # Need to use "str" here because TF 2.2 has a bug for paths objects (fixed in 2.3 though) + pdf_model.save_weights(str(model_file_path), save_format="h5") # If the history of weights is active then loop over it # rewind the state back to every step and write down the results From 15e50eec83c8711b542284235d43dafe19072c24 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Wed, 16 Dec 2020 11:48:47 +0100 Subject: [PATCH 02/27] avoid repetitions in the sum rules --- n3fit/src/n3fit/model_gen.py | 21 +++++++++-------- n3fit/src/n3fit/msr.py | 45 ++++++++++++++++++------------------ 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 55af577085..9ab97a41db 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -482,7 +482,15 @@ def pdfNN_layer_generator( # Basis rotation basis_rotation = FlavourToEvolution(flav_info=flav_info, fitbasis=fitbasis) - integrator_input = None # TODO + # Normalization and sum rules + if impose_sumrule: + sumrule_imposition, integrator_input = msr_constraints.msr_impose(mode=impose_sumrule) + model_input = [integrator_input, placeholder_input] + else: + sumrule_imposition = lambda x: x + integrator_input = None + model_input = [placeholder_input] + pdf_models = [] # Now we need a trainable network per model to be trained in parallel @@ -538,15 +546,10 @@ def layer_fitbasis(x): def layer_pdf(x): return layer_evln(layer_fitbasis(x)) - # Impose sumrule if necessary #TODO still a lot of repetition going on inside the MSR, but not important -for now- - if impose_sumrule: - layer_pdf, integrator_input = msr_constraints.msr_impose(layer_fitbasis, layer_pdf, mode=impose_sumrule) - model_input = [integrator_input, placeholder_input] - else: - integrator_input = None - model_input = [placeholder_input] + # Final PDF + final_pdf = sumrule_imposition(layer_fitbasis, layer_pdf) - pdf_model = MetaModel(model_input, layer_pdf(placeholder_input), name=f"PDF_{i}") + pdf_model = MetaModel(model_input, final_pdf(placeholder_input), name=f"PDF_{i}") pdf_models.append(pdf_model) diff --git a/n3fit/src/n3fit/msr.py b/n3fit/src/n3fit/msr.py index e7d706131d..d009e26054 100644 --- a/n3fit/src/n3fit/msr.py +++ b/n3fit/src/n3fit/msr.py @@ -37,7 +37,7 @@ def gen_integration_input(nx): return xgrid, weights_array -def msr_impose(fit_layer, final_pdf_layer, mode='All', xgrid_input=None, verbose=False): +def msr_impose(nx=int(2e3), basis_size=8, mode='All'): """ This function receives: - fit_layer: the 8-basis layer of PDF which we fit @@ -46,43 +46,44 @@ def msr_impose(fit_layer, final_pdf_layer, mode='All', xgrid_input=None, verbose the final_pdf layer with a normalisation by which the sum rule is imposed """ # 1. Generate the fake input which will be used to integrate - nx = int(2e3) xgrid, weights_array = gen_integration_input(nx) # 2. Prepare the pdf for integration # for that we need to multiply several flavours with 1/x division_by_x = xDivide() - def pdf_integrand(x): - res = operations.op_multiply([division_by_x(x), fit_layer(x)]) - return res - # 3. Now create the integration layer (the layer that will simply integrate, given some weight integrator = xIntegrator(weights_array, input_shape=(nx,)) # 4. Now create the normalization by selecting the right integrations - normalizer = MSR_Normalization(input_shape=(8,), mode=mode) + normalizer = MSR_Normalization(input_shape=(basis_size,), mode=mode) - # 5. Make the xgrid numpy array into a backend input layer so it can be given - if xgrid_input is None: - xgrid_input = operations.numpy_to_input(xgrid) - normalization = normalizer(integrator(pdf_integrand(xgrid_input))) + # 5. Make the xgrid array into a backend input layer so it can be given to the normalization + xgrid_input = operations.numpy_to_input(xgrid) - def ultimate_pdf(x): - return operations.op_multiply_dim([final_pdf_layer(x), normalization]) + # Now parepare a function that takes as input the 8-flavours output of the NN + # and the 14-flavours after the fk rotation and returns a 14-flavours normalized output + # note + TODO: + # the idea was that the normalization should always be applied at the fktable 14-flavours + # and always computed at the output of the NN (in case one would like to compute it differently) + # don't think it is a good idea anymore and should be changed to act only on the output to the fktable + # but will be dealt with in the future. + # fitlayer #final_pdf + def apply_normalization(layer_fitbasis, layer_pdf): + """ + layer_fitbasis: output of the NN + layer_pdf: output for the fktable + """ - if verbose: - # only_int = integrator(pdf_integrand(xgrid_input)) - # modelito = MetaModel(xgrid_input, only_int) - # result = modelito.predict(x = None, steps = 1) + pdf_integrand = operations.op_multiply([division_by_x(xgrid_input), layer_fitbasis(xgrid_input)]) + normalization = normalizer(integrator(pdf_integrand)) - print(" > > Generating model for the inyection layer which imposes MSR") - check_integration(ultimate_pdf, xgrid_input) + def ultimate_pdf(x): + return operations.op_multiply([layer_pdf(x), normalization]) - # Save a reference to xgrid in ultimate_pdf, very useful for debugging - ultimate_pdf.ref_xgrid = xgrid_input + return ultimate_pdf - return ultimate_pdf, xgrid_input + return apply_normalization, xgrid_input def check_integration(ultimate_pdf, integration_input): From b629f0a2593cb7fd10dc7f33336a02d2e75082d0 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Wed, 16 Dec 2020 12:04:03 +0100 Subject: [PATCH 03/27] fix some outstanding issues --- n3fit/src/n3fit/ModelTrainer.py | 6 ++++-- n3fit/src/n3fit/backends/keras_backend/losses.py | 4 ++++ n3fit/src/n3fit/backends/keras_backend/operations.py | 2 +- n3fit/src/n3fit/layers/DY.py | 4 ++-- n3fit/src/n3fit/tests/test_backend.py | 3 ++- n3fit/src/n3fit/tests/test_fit.py | 1 + n3fit/src/n3fit/tests/test_layers.py | 3 ++- 7 files changed, 16 insertions(+), 7 deletions(-) diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py index c7f4279850..c578cef048 100644 --- a/n3fit/src/n3fit/ModelTrainer.py +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -843,9 +843,11 @@ def hyperparametrizable(self, params): models = self._model_generation(pdf_models, partition) # Only after model generation, apply possible weight file + # TODO: not sure whether it is a good idea that all of them start at the same point if self.model_file: log.info("Applying model file %s", self.model_file) - pdf_model.load_weights(self.model_file) + for pdf_model in pdf_models: + pdf_model.load_weights(self.model_file) if k > 0: # Reset the positivity and integrability multipliers @@ -873,7 +875,7 @@ def hyperparametrizable(self, params): stopping_object = Stopping( validation_model, reporting, - pdf_models[0], # TODO + pdf_models[0], # TODO, not stopping for now total_epochs=epochs, stopping_patience=stopping_epochs, save_weights_each=self.save_weights_each, diff --git a/n3fit/src/n3fit/backends/keras_backend/losses.py b/n3fit/src/n3fit/backends/keras_backend/losses.py index 3bb23ce329..d3c8d0600d 100644 --- a/n3fit/src/n3fit/backends/keras_backend/losses.py +++ b/n3fit/src/n3fit/backends/keras_backend/losses.py @@ -14,6 +14,10 @@ def l_invcovmat(invcovmat_np): invcovmat = K.constant(invcovmat_np) def true_loss(y_true, y_pred): + """ + y_true: (1, N) + y_pred: (1, replicas, N) + """ # (yt - yp) * covmat * (yt - yp) tmp = y_true - y_pred return tf.einsum('bri,ij,brj->b', tmp, invcovmat, tmp) diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py index 847168ae38..3e7a85ed44 100644 --- a/n3fit/src/n3fit/backends/keras_backend/operations.py +++ b/n3fit/src/n3fit/backends/keras_backend/operations.py @@ -250,7 +250,7 @@ def pdf_masked_convolution(raw_pdf, basis_mask): Parameters ---------- pdf: tf.tensor - rank 3 (batchsize, xgrid, flavours) + rank 4 (batchsize, xgrid, flavours, replicas) basis_mask: tf.tensor rank 2 tensor (flavours, flavours) mask to apply to the pdf convolution diff --git a/n3fit/src/n3fit/layers/DY.py b/n3fit/src/n3fit/layers/DY.py index 3c70d052f3..5d714731ea 100644 --- a/n3fit/src/n3fit/layers/DY.py +++ b/n3fit/src/n3fit/layers/DY.py @@ -30,12 +30,12 @@ def call(self, pdf_raw): Parameters ---------- pdf_in: tensor - rank 3 tensor (batchsize, xgrid, flavours) + rank 4 tensor (batchsize, xgrid, flavours, replicas) Returns ------- results: tensor - rank 2 tensor (batchsize, ndata) + rank 3 tensor (batchsize, replicas, ndata) """ # Hadronic observables might need splitting of the input pdf in the x dimension # so we have 3 different paths for this layer diff --git a/n3fit/src/n3fit/tests/test_backend.py b/n3fit/src/n3fit/tests/test_backend.py index 2693fd435a..1487e08271 100644 --- a/n3fit/src/n3fit/tests/test_backend.py +++ b/n3fit/src/n3fit/tests/test_backend.py @@ -122,7 +122,8 @@ def test_sum(): # Tests loss functions def test_l_invcovmat(): loss_f = losses.l_invcovmat(INVCOVMAT) - result = loss_f(T1, T2) + # Add a replica and batch dimension to T2 + result = loss_f(T1, np.expand_dims(T2, [0,1])) y = ARR1 - ARR2 tmp = np.dot(INVCOVMAT, y) reference = np.dot(y, tmp) diff --git a/n3fit/src/n3fit/tests/test_fit.py b/n3fit/src/n3fit/tests/test_fit.py index adfb4809a8..2c3f509405 100644 --- a/n3fit/src/n3fit/tests/test_fit.py +++ b/n3fit/src/n3fit/tests/test_fit.py @@ -138,6 +138,7 @@ def test_performfit_and_timing(tmp_path): auxiliary_performfit(tmp_path, replica=2, timing=True) +@pytest.mark.skip(reason="Still not implemented in parallel mode") def test_hyperopt(tmp_path): # Prepare the run quickcard = f"hyper-{QUICKNAME}.yml" diff --git a/n3fit/src/n3fit/tests/test_layers.py b/n3fit/src/n3fit/tests/test_layers.py index 8ea84457da..d27616b830 100644 --- a/n3fit/src/n3fit/tests/test_layers.py +++ b/n3fit/src/n3fit/tests/test_layers.py @@ -163,7 +163,8 @@ def test_DY(): fks = [i['fktable'] for i in fkdicts] obs_layer = layers.DY(fkdicts, fks, ope, nfl=FLAVS) pdf = np.random.rand(XSIZE, FLAVS) - kp = op.numpy_to_tensor(np.expand_dims(pdf, 0)) + # Add batch dimension (0) and replica dimension (-1) + kp = op.numpy_to_tensor(np.expand_dims(pdf, [0,-1])) # generate the n3fit results result_tensor = obs_layer(kp) result = op.evaluate(result_tensor) From edf697ace47c2fbf0f12b131cebe34ad1c7a4d10 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 17 Dec 2020 18:58:49 +0100 Subject: [PATCH 04/27] promote the losses to their own layers as outputs of predict --- .../n3fit/backends/keras_backend/MetaModel.py | 5 +- .../n3fit/backends/keras_backend/losses.py | 76 ------------------- .../backends/keras_backend/operations.py | 20 +++++ n3fit/src/n3fit/layers/losses.py | 52 +++++++++++++ n3fit/src/n3fit/model_gen.py | 49 ++++++------ 5 files changed, 97 insertions(+), 105 deletions(-) delete mode 100644 n3fit/src/n3fit/backends/keras_backend/losses.py create mode 100644 n3fit/src/n3fit/layers/losses.py diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index fe17eaec99..d6733de8d0 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -305,10 +305,7 @@ def make_test_function(self): @tf.function def eval_fun(*args): predictions = self(self._parse_input(None)) - # Concatenate the output to split them again as a list - ypred = tf.concat(predictions, axis=-1) - predspl = tf.split(ypred, lens, axis=-1) - loss_list = [lfun(target, pred) for target, pred, lfun in zip(tt, predspl, self.loss)] + loss_list = [lfun(target, pred) for target, pred, lfun in zip(tt, predictions, self.loss)] ret = [tf.reduce_sum(loss_list)] + loss_list return dict(zip(out_names, ret)) diff --git a/n3fit/src/n3fit/backends/keras_backend/losses.py b/n3fit/src/n3fit/backends/keras_backend/losses.py deleted file mode 100644 index d3c8d0600d..0000000000 --- a/n3fit/src/n3fit/backends/keras_backend/losses.py +++ /dev/null @@ -1,76 +0,0 @@ -""" - Module containing a list of loss functions availables to the fitting code -""" - -import tensorflow as tf -from tensorflow.keras import backend as K - - -def l_invcovmat(invcovmat_np): - """ - Returns a loss function such that: - L = \sum_{ij} (yt - yp)_{i} invcovmat_{ij} (yt - yp)_{j} - """ - invcovmat = K.constant(invcovmat_np) - - def true_loss(y_true, y_pred): - """ - y_true: (1, N) - y_pred: (1, replicas, N) - """ - # (yt - yp) * covmat * (yt - yp) - tmp = y_true - y_pred - return tf.einsum('bri,ij,brj->b', tmp, invcovmat, tmp) - - return true_loss - - -def l_positivity(alpha=1e-7): - """ - Returns L = elu(y_pred) (considers y_true as 0) - - The positivity loss is computed by inverting the sign of the - datapoints and then applying the elu function, this function is - f(x) = x if x > 0 - f(x) = alpha * (e^{x} - 1) if x < 0 - This is done to avoid a big discontinuity in the derivative at 0 when - the lagrange multiplier is very big. - In practice this function can produce results in the range (-alpha, inf) - """ - - def true_loss(y_true, y_pred): - y = -y_pred - loss = K.elu(y, alpha=alpha) - res = K.sum(loss) - return tf.reshape(res, (-1,)) - - return true_loss - - -def l_integrability(): - """ - Returns (y_pred)*(y_pred) - """ - - def true_loss(y_true, y_pred): - loss = K.square(y_pred) - res = K.sum(loss, keepdims=True) - return tf.reshape(res, (-1,)) - - return true_loss - - -def l_diaginvcovmat(diaginvcovmat_np): - """ - Returns a loss function such that: - L = sum_{i} (yt - yp)_{i} invcovmat_{ii} (yt - yp)_{i} - diaginvcovmat_np should be 1d - """ - invcovmat = K.constant(diaginvcovmat_np) - - def true_loss(y_true, y_pred): - tmp = y_true - y_pred - res = tf.tensordot(invcovmat, K.transpose(tmp * tmp), axes=1) - return tf.reshape(res, (-1,)) - - return true_loss diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py index 3e7a85ed44..35f6a632fb 100644 --- a/n3fit/src/n3fit/backends/keras_backend/operations.py +++ b/n3fit/src/n3fit/backends/keras_backend/operations.py @@ -266,6 +266,7 @@ def pdf_masked_convolution(raw_pdf, basis_mask): return pdf_x_pdf + def tensor_product(*args, **kwargs): """ Computes the tensordot product between tensor_x and tensor_y @@ -274,6 +275,14 @@ def tensor_product(*args, **kwargs): return tf.tensordot(*args, **kwargs) +def einsum(equation, *args, **kwargs): + """ + Computes the tensor product using einsum + See full `docs `_ + """ + return tf.einsum(equation, *args, **kwargs) + + @tf.function(experimental_relax_shapes=True) def op_log(o_tensor, **kwargs): @@ -299,6 +308,7 @@ def split(*args, **kwargs): """ return tf.split(*args, **kwargs) + def scatter_to_one(values, indices=[[1]], output_dim=14): """ Like scatter_nd initialized to one instead of zero @@ -306,3 +316,13 @@ def scatter_to_one(values, indices=[[1]], output_dim=14): """ ones = np.ones(output_dim, dtype=np.float32) return tf.tensor_scatter_nd_update(ones, indices, values) + + +@tf.function +def backend_function(fun_name, *args, **kwargs): + """ + Calls the (``fun_name``) backend function + see full `docs `_ for some possibilities + """ + fun = getattr(K, fun_name) + return fun(*args, **kwargs) diff --git a/n3fit/src/n3fit/layers/losses.py b/n3fit/src/n3fit/layers/losses.py new file mode 100644 index 0000000000..f56dc0f461 --- /dev/null +++ b/n3fit/src/n3fit/layers/losses.py @@ -0,0 +1,52 @@ +""" + Module containg the losses to be apply to the models as layers +""" + +from n3fit.backends import MetaLayer +from n3fit.backends import operations as op + +class L_invcovmat(MetaLayer): + """ + Loss function such that: + L = \sum_{ij} (yt - yp)_{i} invcovmat_{ij} (yt - yp)_{j} + """ + def __init__(self, invcovmat, y_true, **kwargs): + self.invcovmat = op.numpy_to_tensor(invcovmat) + self.y_true = op.numpy_to_tensor(y_true) + super().__init__(**kwargs) + + def call(self, y_pred, **kwargs): + tmp = self.y_true - y_pred + res = op.einsum('bri, ij, brj -> r', tmp, self.invcovmat, tmp) + return res + +class L_positivity(MetaLayer): + """ + Returns L = elu(y_pred) (considers y_true as 0) + + The positivity loss is computed by inverting the sign of the + datapoints and then applying the elu function, this function is + f(x) = x if x > 0 + f(x) = alpha * (e^{x} - 1) if x < 0 + This is done to avoid a big discontinuity in the derivative at 0 when + the lagrange multiplier is very big. + In practice this function can produce results in the range (-alpha, inf) + """ + def __init__(self, alpha=1e-7, **kwargs): + self.alpha = alpha + super().__init__(**kwargs) + + def call(self, y_pred, **kwargs): + y = -y_pred + loss = op.backend_function("elu", y, alpha=self.alpha) + # Sum over the batch and the datapoints + return op.sum(loss, axis=[0,-1]) + +class L_integrability(MetaLayer): + """ + Returns L = (y_pred)*(y_pred) + """ + def call(self, y_pred, **kwargs): + y = op.backend_function("square", y_pred) + # Sum over the batch and the datapoints + return op.sum(y, axis=[0,-1]) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 9ab97a41db..9a075d548c 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -8,12 +8,11 @@ Generates the PDF NN layer to be fitted """ import n3fit.msr as msr_constraints -from n3fit.layers import DIS, DY, Mask, ObsRotation +from n3fit.layers import DIS, DY, Mask, ObsRotation, losses from n3fit.layers import Preprocessing, FkRotation, FlavourToEvolution from n3fit.backends import MetaModel, Input from n3fit.backends import operations -from n3fit.backends import losses from n3fit.backends import MetaLayer, Lambda from n3fit.backends import base_layer_selector, regularizer_selector @@ -130,19 +129,20 @@ def observable_generator(spec_dict, positivity_initial=1.0, integrability=False) model_obs_ex.append(obs_layer_ex) # Prepare a concatenation as experiments are one single entity formed by many datasets - def gen_concat(name): - return operations.as_layer(operations.concatenate, op_kwargs={"axis": 2}, name=name) + def gen_concat(): + return operations.as_layer(operations.concatenate, op_kwargs={"axis": 2}) # Tensorflow operations have ugly name, # we want the final observables to be named just {spec_name} (with'val/exp' if needed) tr_name = spec_name vl_name = f"{spec_name}_val" ex_name = f"{spec_name}_exp" - concat_ex = gen_concat(ex_name) + + concat_ex = gen_concat() # For data transformation all concatenations are the same if spec_dict.get("data_transformation") is None: - concat_tr = gen_concat(tr_name) - concat_vl = gen_concat(vl_name) + concat_tr = gen_concat() + concat_vl = gen_concat() else: concat_tr = concat_ex concat_vl = concat_ex @@ -177,22 +177,22 @@ def experiment_layer(pdf, model_obs=model_obs_ex, concat=concat_ex, rotation=Non # Now create the model for this experiment full_nx = sum(dataset_xsizes) + loss = lambda x,y: operations.sum(y) if spec_dict["positivity"]: out_mask = Mask( c=positivity_initial, axis=1, - name=spec_name, ) - def out_positivity(pdf_layer, datasets_out=None): - exp_result = experiment_layer(pdf_layer) - return out_mask(exp_result) - if integrability: - loss = losses.l_integrability() + loss_pos = losses.L_integrability(name=spec_name) else: - loss = losses.l_positivity() + loss_pos = losses.L_positivity(name=spec_name) + + def out_positivity(pdf_layer, datasets_out=None): + exp_result = experiment_layer(pdf_layer) + return loss_pos(out_mask(exp_result)) layer_info = { "inputs": model_inputs, @@ -206,42 +206,41 @@ def out_positivity(pdf_layer, datasets_out=None): invcovmat_vl = spec_dict["invcovmat_vl"] invcovmat = spec_dict["invcovmat_true"] + # Prepare the loss function + loss_tr = losses.L_invcovmat(invcovmat_tr, spec_dict["expdata"], name=tr_name) + loss_vl = losses.L_invcovmat(invcovmat_vl, spec_dict["expdata_vl"], name=vl_name) + loss_ex = losses.L_invcovmat(invcovmat, spec_dict["expdata_true"], name=ex_name) + # Generate the loss function and rotations of the final data (if any) if spec_dict.get("data_transformation") is not None: + # TODO: I'm asuming that the diagonal covmat will work ootb, check # The rotation is the last layer so it should carry The Name obsrot_tr = ObsRotation(spec_dict.get("data_transformation"), name=tr_name) obsrot_vl = ObsRotation(spec_dict.get("data_transformation_vl"), name=vl_name) - loss_tr = losses.l_diaginvcovmat(invcovmat_tr) - loss_vl = losses.l_diaginvcovmat(invcovmat_vl) else: obsrot_tr = None obsrot_vl = None - loss_tr = losses.l_invcovmat(invcovmat_tr) - # TODO At this point we need to intercept the data and compile the loss with it - # then the validation must have a list of None as an output - loss_vl = losses.l_invcovmat(invcovmat_vl) - loss = losses.l_invcovmat(invcovmat) def out_tr(pdf_layer, datasets_out=None): exp_result = experiment_layer( pdf_layer, model_obs=model_obs_tr, concat=concat_tr, datasets_out=datasets_out, rotation=obsrot_tr ) - return exp_result + return loss_tr(exp_result) def out_vl(pdf_layer, datasets_out=None): exp_result = experiment_layer( pdf_layer, model_obs=model_obs_vl, concat=concat_vl, datasets_out=datasets_out, rotation=obsrot_vl ) - return exp_result + return loss_vl(exp_result) layer_info = { "inputs": model_inputs, "output": experiment_layer, "loss": loss, "output_tr": out_tr, - "loss_tr": loss_tr, + "loss_tr": loss, "output_vl": out_vl, - "loss_vl": loss_vl, + "loss_vl": loss, "experiment_xsize": full_nx, } From a40f87257935250edc781b8fc082d5275ac55338 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 17 Dec 2020 19:51:18 +0100 Subject: [PATCH 05/27] added a test for the new losses --- n3fit/src/n3fit/tests/test_backend.py | 33 -------------------- n3fit/src/n3fit/tests/test_losses.py | 43 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 33 deletions(-) create mode 100644 n3fit/src/n3fit/tests/test_losses.py diff --git a/n3fit/src/n3fit/tests/test_backend.py b/n3fit/src/n3fit/tests/test_backend.py index 1487e08271..64355a06f1 100644 --- a/n3fit/src/n3fit/tests/test_backend.py +++ b/n3fit/src/n3fit/tests/test_backend.py @@ -6,7 +6,6 @@ import functools import numpy as np from n3fit.backends import operations as op -from n3fit.backends import losses # General parameters DIM = 7 @@ -117,35 +116,3 @@ def test_tensor_product(): def test_sum(): numpy_check(op.sum, np.sum, mode='single') - - -# Tests loss functions -def test_l_invcovmat(): - loss_f = losses.l_invcovmat(INVCOVMAT) - # Add a replica and batch dimension to T2 - result = loss_f(T1, np.expand_dims(T2, [0,1])) - y = ARR1 - ARR2 - tmp = np.dot(INVCOVMAT, y) - reference = np.dot(y, tmp) - are_equal(result, reference) - - -def test_l_positivity(): - alpha = 1e-7 - loss_f = losses.l_positivity(alpha=alpha) - result = loss_f(0.0, T1) - - def elu_sum(yarr_in): - """ Applies Exponential Linear Unit - to an array and sums it up """ - yarr = -yarr_in - res = 0.0 - for y in yarr: - if y > 0: - res += y - else: - res += alpha * (np.exp(y) - 1) - return res - - reference = elu_sum(ARR1) - are_equal(result, reference) diff --git a/n3fit/src/n3fit/tests/test_losses.py b/n3fit/src/n3fit/tests/test_losses.py new file mode 100644 index 0000000000..44a51e9fc3 --- /dev/null +++ b/n3fit/src/n3fit/tests/test_losses.py @@ -0,0 +1,43 @@ +""" + Test the losses layers +""" +import numpy as np +from n3fit.layers import losses +from n3fit.backends import operations as op +from .test_backend import are_equal, DIM + +ARR1 = np.random.rand(DIM) +ARR2 = np.random.rand(DIM) +C = np.random.rand(DIM, DIM) +INVCOVMAT = np.linalg.inv(C @ C.T) + +# Tests loss functions +def test_l_invcovmat(): + loss_f = losses.L_invcovmat(INVCOVMAT, ARR1) + # Add a replica and batch dimension to T2 + result = loss_f(np.expand_dims(ARR2, [0, 1])) + y = ARR1 - ARR2 + tmp = np.dot(INVCOVMAT, y) + reference = np.dot(y, tmp) + are_equal(result, reference) + + +def test_l_positivity(): + alpha = 1e-7 + loss_f = losses.L_positivity(alpha=alpha) + result = loss_f(np.expand_dims(ARR2, [0, 1])) + + def elu_sum(yarr_in): + """Applies Exponential Linear Unit + to an array and sums it up""" + yarr = -yarr_in + res = 0.0 + for y in yarr: + if y > 0: + res += y + else: + res += alpha * (np.exp(y) - 1) + return res + + reference = elu_sum(ARR1) + are_equal(result, reference) From d20a9f3602405bcbf357c61e287f8d5abad352d9 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 17 Dec 2020 19:55:01 +0100 Subject: [PATCH 06/27] minor changes so the code continues to work --- n3fit/src/n3fit/ModelTrainer.py | 2 +- n3fit/src/n3fit/backends/__init__.py | 1 - .../n3fit/backends/keras_backend/MetaModel.py | 45 ++++++++++--------- n3fit/src/n3fit/model_gen.py | 5 ++- 4 files changed, 29 insertions(+), 24 deletions(-) diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py index c578cef048..a38c324211 100644 --- a/n3fit/src/n3fit/ModelTrainer.py +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -897,7 +897,7 @@ def hyperparametrizable(self, params): validation_loss = stopping_object.vl_chi2 # Compute experimental loss - exp_loss_raw = models["experimental"].compute_losses()["loss"] + exp_loss_raw = np.take(models["experimental"].compute_losses()["loss"], -1) experimental_loss = exp_loss_raw / model_dicts["experimental"]["ndata"] if self.mode_hyperopt: diff --git a/n3fit/src/n3fit/backends/__init__.py b/n3fit/src/n3fit/backends/__init__.py index 3370ae75a8..726a752eda 100644 --- a/n3fit/src/n3fit/backends/__init__.py +++ b/n3fit/src/n3fit/backends/__init__.py @@ -12,7 +12,6 @@ regularizer_selector, Concatenate, ) -from n3fit.backends.keras_backend import losses from n3fit.backends.keras_backend import operations from n3fit.backends.keras_backend import constraints from n3fit.backends.keras_backend import callbacks diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index d6733de8d0..7d36fa6386 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -123,6 +123,7 @@ def __init__(self, input_tensors, output_tensors, **kwargs): self.all_outputs = output_list self.target_tensors = None self.eval_fun = None + self.compute_losses_function = None def _parse_input(self, extra_input=None, pass_content=True): """ Returns the input tensors the model was compiled with. @@ -169,34 +170,36 @@ def predict(self, x=None, **kwargs): def compute_losses(self): """ - This function is the fast-equivalent to the model ``evaluate(x,y)`` method. + This function is equivalent to the model ``evaluate(x,y)`` method of most TensorFlow models + which return a dictionary of losses per output layer. + The losses reported in the ``evaluate`` method for n3fit are, however, summed over replicas. + Instead the loss we are interested in is usually the output of the model (i.e., predict) - On first call it calls ``.evaluate(return_dict=True, verbose=0)`` to force - the initialization of the test function. - Subsequent calls of this method will (when applicable) - directly call the internal evaluation function ``eval_fun``. - This bypasses the pre- and post- evaluation steps, resulting in a ~10% speed up - with respect to ``.evaluate(...)`` + This function then generates a dictionary of partial losses of the model separated per replica. + i.e., the output for experiment {'LHC_exp'} will be an array of Nrep elements. Returns ------- dict a dictionary with all partial losses of the model """ - if self.eval_fun is None: - # We still need to perform some initialization - if LEGACY: - # For TF < 2.2 we need to generate the test_function ourselves - self.make_test_function() - else: - return self.evaluate(return_dict=True, verbose=False) - if LEGACY: - # For tF < 2.2 we need to force the output to be a float - ret = self.eval_fun() - ret['loss'] = ret['loss'].numpy() - return ret - else: - return self.eval_fun() + # TODO might not work for TF < 2.2, we might not care either + if self.compute_losses_function is None: + out_names = [f"{i}_loss" for i in self.output_names] + out_names.insert(0, "loss") + + # Compile a evaluation function + @tf.function + def losses_fun(): + predictions = self(self._parse_input(None)) + total_loss = tf.reduce_sum(predictions, axis=0) + ret = [total_loss] + predictions + return dict(zip(out_names, ret)) + + self.compute_losses_function = losses_fun + + return self.compute_losses_function() + def evaluate(self, x=None, y=None, **kwargs): """ diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 9a075d548c..113c148b65 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -233,9 +233,12 @@ def out_vl(pdf_layer, datasets_out=None): ) return loss_vl(exp_result) + def out_exp(pdf_layer, datasets_out=None): + return loss_ex(experiment_layer(pdf_layer)) + layer_info = { "inputs": model_inputs, - "output": experiment_layer, + "output": out_exp, "loss": loss, "output_tr": out_tr, "loss_tr": loss, From 353bd2a29514a787858e9579dd8dcc031a61d712 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Fri, 18 Dec 2020 13:04:10 +0100 Subject: [PATCH 07/27] keep track of the validation and positivity separately --- n3fit/src/n3fit/ModelTrainer.py | 2 +- n3fit/src/n3fit/io/writer.py | 24 +++-- n3fit/src/n3fit/stopping.py | 186 ++++++++++++++++++++++++-------- 3 files changed, 157 insertions(+), 55 deletions(-) diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py index a38c324211..60f8f52aac 100644 --- a/n3fit/src/n3fit/ModelTrainer.py +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -875,7 +875,7 @@ def hyperparametrizable(self, params): stopping_object = Stopping( validation_model, reporting, - pdf_models[0], # TODO, not stopping for now + pdf_models, total_epochs=epochs, stopping_patience=stopping_epochs, save_weights_each=self.save_weights_each, diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index 7ae1fb24a0..43552c0caf 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -69,6 +69,14 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): # Check the directory exist, if it doesn't, generate it os.makedirs(replica_path_set, exist_ok=True) + # Get the replica status for this object + replica_status = self.stopping_object.get_next_replica() + stop_epoch = self.stopping_object.epoch_of_the_stop + # TODO, this is wrong, will be dealt with later + tr_chi2 = tr_chi2.tolist()[0] + true_chi2 = true_chi2.tolist() + vl_chi2 = vl_chi2.tolist()[0] + # export PDF grid to file storefit( self.pdf_object, @@ -84,7 +92,7 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): integrability_numbers, allchi2_lines, preproc_lines, - self.stopping_object.positivity_status(), + replica_status.positivity_status, self.timings, ) @@ -92,18 +100,18 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): # export all metadata from the fit to a single yaml file output_file = f"{replica_path_set}/{fitname}.json" json_dict = jsonfit( - self.stopping_object, self.pdf_object, tr_chi2, vl_chi2, true_chi2, self.timings + replica_status, self.pdf_object, tr_chi2, true_chi2, stop_epoch, self.timings ) with open(output_file, "w") as fs: json.dump(json_dict, fs, indent=2) -def jsonfit(stopping_object, pdf_object, tr_chi2, vl_chi2, true_chi2, timing): +def jsonfit(replica_status, pdf_object, tr_chi2, true_chi2, epoch_stop, timing): """Generates a dictionary containing all relevant metadata for the fit Parameters ---------- - stopping_object: n3fit.stopping.Stopping + replica_status: n3fit.stopping.ReplicaBest a stopping.Validation object pdf_object: n3fit.vpinterface.N3PDF N3PDF object constructed from the pdf_model @@ -121,12 +129,12 @@ def jsonfit(stopping_object, pdf_object, tr_chi2, vl_chi2, true_chi2, timing): # Generate preprocessing information all_info["preprocessing"] = pdf_object.get_preprocessing_factors() # .fitinfo-like info - all_info["stop_epoch"] = stopping_object.stop_epoch - all_info["best_epoch"] = stopping_object.e_best_chi2 + all_info["stop_epoch"] = epoch_stop + all_info["best_epoch"] = replica_status.best_epoch all_info["erf_tr"] = tr_chi2 - all_info["erf_vl"] = vl_chi2 + all_info["erf_vl"] = replica_status.best_vl all_info["chi2"] = true_chi2 - all_info["pos_state"] = stopping_object.positivity_status() + all_info["pos_state"] = replica_status.positivity_status all_info["arc_lengths"] = pdf_object.compute_arclength().tolist() all_info["integrability"] = pdf_object.integrability_numbers().tolist() all_info["timing"] = timing diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index 6773cd6c1c..c881a13658 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -117,7 +117,7 @@ def parse_losses(history_object, data, suffix="loss"): total_points = 0 total_loss = 0 for exp_name, npoints in data.items(): - loss = np.take(hobj[exp_name + f"_{suffix}"], -1) + loss = np.array(hobj[exp_name + f"_{suffix}"]) dict_chi2[exp_name] = loss / npoints total_points += npoints total_loss += loss @@ -133,7 +133,7 @@ def parse_losses(history_object, data, suffix="loss"): class FitState: """ - Holds the state of the chi2 during the fit. + Holds the state of the chi2 during the fit for all replicas It holds the necessary information to reload the fit to a specific point in time if we are interested on reloading @@ -163,11 +163,6 @@ def __init__(self, all_tr_chi2, all_vl_chi2, info): self.weights = None self.best_epoch = 0 - def register_weigths(self, weights, best_epoch): - """ Save the current best weights and best_epoch of the fit """ - self.weights = weights - self.best_epoch = best_epoch - @property def vl_chi2(self): """ Returns the total validation chi2 """ @@ -178,10 +173,56 @@ def tr_chi2(self): """ Returns the total training chi2 """ return self.all_tr_chi2["total"] + def vl_chi2_for_replica(self, i): + """ Returns the validation_chi2 for a given replica """ + return self.all_vl_chi2["total"][i] + def __str__(self): return f"chi2: tr={self.tr_chi2} vl={self.vl_chi2}" +class ReplicaBest: + """ Extra complication which eventually will be merged with someone else + but it is here only for development.""" + + def __init__(self, pdf_model): + self._pdf_model = pdf_model + self._weights = None + self._best_epoch = None + self._best_vl_chi2 = INITIAL_CHI2 + + def positivity_pass(self): + """ By definition, if we have a ``best_epoch`` then positivity passed """ + if self._best_epoch is None: + return False + else: + return True + + @property + def best_epoch(self): + return self._best_epoch + + @property + def best_vl(self): + return float(self._best_vl_chi2) + + @property + def positivity_status(self): + if self.positivity_pass(): + return POS_OK + else: + return POS_BAD + + def register_best(self, chi2, epoch): + self._weights = self._pdf_model.get_weights() + self._best_epoch = epoch + self._best_vl_chi2 = chi2 + + def reload(self): + if self._weights: + self._pdf_model.set_weights(self._weights) + + class FitHistory: """ Keeps a list of FitState items holding the full history of the fit. @@ -194,20 +235,24 @@ class FitHistory: Parameters ---------- - pdf_model: n3fit.backends.MetaModel - PDF model being trained, used to saved the weights and compute - more + pdf_models: n3fit.backends.MetaModel + list of PDF models being trained, used to saved the weights + save_weights_each: int if given, it will save a snapshot of the fit every `save_weights_each` epochs """ + def __init__(self, pdf_models, save_weights_each=None): + # Save a list of status per replica + self._replicas = [] + for pdf_model in pdf_models: + self._replicas.append(ReplicaBest(pdf_model)) + # Save a list of status for the entire fit + self._history = [] - def __init__(self, pdf_model, save_weights_each=None): - self._pdf_model = pdf_model self._save_weights_each = save_weights_each # Initialize variables for the history self._weights = None self._best_epoch = None - self._history = [] self.final_epoch = None # Initialize variables for the snapshots self.reloadable_history = [] @@ -257,6 +302,20 @@ def best_tr(self): else: return self._history[self.final_epoch].tr_chi2 + def save_best_replica(self, i, epoch = None): + """ Save the state of replica ``i`` as a best fit so far. + If an epoch is given, save the best as the given epoch, otherwise + use the last one + """ + if epoch is None: + epoch = self.final_epoch + chi2 = self._history[epoch].vl_chi2[i] + self._replicas[i].register_best(chi2, epoch) + + def all_best_vl(self): + """ Returns the best validation chi2 for each replica """ + return [i.best_vl for i in self._replicas] + def register(self, fitstate, epoch): """ Save a new fitstate and updates the current final epoch Every `save_weights_each` (if set) saves a snapshot of the current best fit into @@ -264,8 +323,9 @@ def register(self, fitstate, epoch): Parameters ---------- - `fitstate` - a fitstate object to save + fitstate: FitState + FitState object + the fitstate of the object to save `epoch` the current epoch of the fit """ @@ -273,19 +333,22 @@ def register(self, fitstate, epoch): self._history.append(fitstate) if self._save_weights_each: save_here = (epoch + 1) % self._save_weights_each - if save_here == 0: - fitstate.register_weigths(self._weights, self.best_epoch) - self.reloadable_history.append(fitstate) + # TODO this must be done differently now +# if save_here == 0: +# fitstate.register_weigths(self._weights, self.best_epoch) +# self.reloadable_history.append(fitstate) def reload(self, weights=None): """ Reloads the best fit weights into the model if there are models to be reloaded A set of weights can be enforced as an optional argument """ + # TODO the weights part is tricky if weights is None: - weights = self._weights - if weights: - self._pdf_model.set_weights(weights) + for replica in self._replicas: + replica.reload() +# if weights: +# self._pdf_model.set_weights(weights) def rewind(self, step): """ Rewind the FitHistory object to the step `step` in the fit @@ -308,13 +371,13 @@ class Stopping: Parameters ---------- validation_model: n3fit.backends.MetaModel - the model with the validation mask applied - (and compiled with the validation data and covmat) + the model with the validation mask applied + (and compiled with the validation data and covmat) all_data_dict: dict - list containg all dictionaries containing all information about - the experiments/validation/regularizers/etc to be parsed by Stopping - pdf_model: n3fit.backends.MetaModel - pdf_model being trained + list containg all dictionaries containing all information about + the experiments/validation/regularizers/etc to be parsed by Stopping + pdf_models: list(n3fit.backends.MetaModel) + list of pdf_models being trained threshold_positivity: float maximum value allowed for the sum of all positivity losses total_epochs: int @@ -331,7 +394,7 @@ def __init__( self, validation_model, all_data_dicts, - pdf_model, + pdf_models, threshold_positivity=1e-6, total_epochs=0, stopping_patience=7000, @@ -348,9 +411,11 @@ def __init__( else: self.validation = Validation(validation_model, vl_ndata) self.positivity = Positivity(threshold_positivity, pos_sets) - self.history = FitHistory(pdf_model, save_weights_each=save_weights_each) + + self.history = FitHistory(pdf_models) # Initialize internal variables for the stopping + self.n_replicas = len(pdf_models) self.threshold_chi2 = threshold_chi2 self.dont_stop = dont_stop self.stop_now = False @@ -358,6 +423,7 @@ def __init__( self.stopping_degree = 0 self.count = 0 self.total_epochs = total_epochs + self.replica_iterator = None @property def vl_chi2(self): @@ -416,45 +482,67 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): Parameters ---------- - `training_info` - the output of a .fit() run - `epoch` - the index of the epoch + training_info: dict + output of a .fit() call, dictionary of the total loss (summed over replicas) for + each experiment + epoch: int + index of the epoch Returns ------- - `pass_ok` + pass_ok: bool true/false according to the status of the run """ # Step 1. Preprocess the event, count it towards the stopping degree # parse the training information and check whether it is a good point tr_chi2, all_tr = parse_losses(training_info, self._tr_ndata) - + if np.isnan(tr_chi2): log.warning(" > NaN found, stopping activated") self.stop_now = True # If we had a good model at any point, reload - self.history.reload() + self.history.reload() # TODO return False self.stopping_degree += self.count # Step 2. Check the validation loss at this point + # each loss is an array of the loss per replica vl_chi2, all_vl = self.validation.loss() # Step 3. Store information about the run and print stats if asked fitstate = FitState(all_tr, all_vl, self.validation.state) self.history.register(fitstate, epoch) + if print_stats: self.print_current_stats(epoch, fitstate) # Step 4. Check whether this is a better fit # this means improving vl_chi2 and passing positivity + + # Get the values that pass validation + passes_val = vl_chi2 < self.threshold_chi2 + passes_val &= vl_chi2 < self.history.all_best_vl() + # And the ones that pass positivity + passes_pos = self.positivity(fitstate) + + # Now loop over the valid indices to check whether the vl improved + # TODO: check whether this loop is hurting at all performance (shouldnt???) + for i in np.where(passes_val & passes_pos)[0]: + self.history.save_best_replica(i) + + # There is no stopping for now + return True + + # TODO for now we force all fits to get to the end + + + # For each of them, check whether the validation chi2 is better or not if self.positivity(fitstate) and vl_chi2 < self.threshold_chi2: if vl_chi2 < self.history.best_vl(): # Set the new best self.history.best_epoch = epoch - # Save stopping info + # Reset stopping info self.stopping_degree = 0 # Initialize the counter self.count = 1 @@ -517,6 +605,12 @@ def positivity_status(self): else: return POS_BAD + def get_next_replica(self): + """ Return the next ReplicaBest object""" + if self.replica_iterator is None: + self.replica_iterator = iter(self.history._replicas) + return next(self.replica_iterator) + def chi2exps_str(self, log_each=100): """ Returns a list of log-string with the status of the fit @@ -593,10 +687,10 @@ def _compute_validation_loss(self): Returns ------- - `total_loss` + total_loss: float total vale for the validation loss - `vl_dict` - dictionary containing a map of experiment names and loss + vl_dict: dict + dictionary containing a map of experiment names and their loss per replica """ loss_dict = self.model.compute_losses() self.state = loss_dict @@ -638,20 +732,20 @@ def check_positivity(self, history_object): If the positivity loss is above the threshold, the positivity fails otherwise, it passes. + It returns an array booleans which are True if positivity passed Parameters ---------- history_object: dict dictionary of entries in the form {'name': loss}, output of a MetaModel .fit() """ + positivity_pass = True for key in self.positivity_sets: key_loss = f"{key}_loss" - # If we are taking the avg when checking the output, we should do so here as well? - positivity_loss = np.take(history_object[key_loss], -1) - if positivity_loss > self.threshold: - return False - # If none of the positivities failed, it passes - return True + positivity_pass &= history_object[key_loss] < self.threshold + if not all(positivity_pass): + return np.array(positivity_pass) + return np.array(positivity_pass) def __call__(self, fitstate): """ From c4c6d26a43df9be79c2a532bff0babd68c16809a Mon Sep 17 00:00:00 2001 From: juacrumar Date: Fri, 18 Dec 2020 14:22:09 +0100 Subject: [PATCH 08/27] minimal working example --- n3fit/src/n3fit/ModelTrainer.py | 2 +- n3fit/src/n3fit/io/writer.py | 6 +++--- n3fit/src/n3fit/performfit.py | 2 +- n3fit/src/n3fit/stopping.py | 4 +++- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py index 60f8f52aac..73329ae617 100644 --- a/n3fit/src/n3fit/ModelTrainer.py +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -762,7 +762,7 @@ def evaluate(self, stopping_object): experimental = self.model_dicts["experimental"] train_chi2 = stopping_object.evaluate_training(training["model"]) val_chi2, _ = stopping_object.validation.loss() - exp_chi2 = experimental["model"].compute_losses()["loss"] / experimental["ndata"] + exp_chi2 = np.array(experimental["model"].compute_losses()["loss"]) / experimental["ndata"] return train_chi2, val_chi2, exp_chi2 def hyperparametrizable(self, params): diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index 43552c0caf..1f69f04878 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -70,11 +70,11 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): os.makedirs(replica_path_set, exist_ok=True) # Get the replica status for this object - replica_status = self.stopping_object.get_next_replica() + ii, replica_status = self.stopping_object.get_next_replica() stop_epoch = self.stopping_object.epoch_of_the_stop # TODO, this is wrong, will be dealt with later - tr_chi2 = tr_chi2.tolist()[0] - true_chi2 = true_chi2.tolist() + tr_chi2 = tr_chi2.tolist()[ii] + true_chi2 = true_chi2.tolist()[ii] vl_chi2 = vl_chi2.tolist()[0] # export PDF grid to file diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index 607d23f163..3081b76b82 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -344,7 +344,7 @@ def performfit( # TODO: recompute training, valudation and experimental _per_ pdfmodel training_chi2, val_chi2, exp_chi2 = the_model_trainer.evaluate(stopping_object) writer_wrapper.write_data( - replica_path_set, output_path.name, training_chi2, val_chi2, true_chi2 + replica_path_set, output_path.name, training_chi2, val_chi2, exp_chi2 ) # Save the weights to some file for the given replica diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index c881a13658..35706f6c70 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -609,7 +609,9 @@ def get_next_replica(self): """ Return the next ReplicaBest object""" if self.replica_iterator is None: self.replica_iterator = iter(self.history._replicas) - return next(self.replica_iterator) + self.ii = -1 + self.ii += 1 + return self.ii, next(self.replica_iterator) def chi2exps_str(self, log_each=100): """ From 823915d36b9cd58b4362c4ee83f87ffae7902234 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Fri, 18 Dec 2020 15:05:43 +0100 Subject: [PATCH 09/27] allow only running in configurations that might work --- n3fit/src/n3fit/checks.py | 12 ++++++++++++ n3fit/src/n3fit/performfit.py | 1 + 2 files changed, 13 insertions(+) diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py index b96c2d0117..217f367d8a 100644 --- a/n3fit/src/n3fit/checks.py +++ b/n3fit/src/n3fit/checks.py @@ -352,3 +352,15 @@ def check_consistent_basis(fitting, theoryid): raise CheckError(f"{theoryid} (intrinsic charm) is incompatible with basis {fitbasis}") if not theoryid.get_description()["IC"] and has_c: raise CheckError(f"{theoryid} (perturbative charm) is incompatible with basis {fitbasis}") + + +@make_argcheck +def can_run_in_parallel(fitting, parallel_models=1): + """ Checks whether a runcard which is trying to run several replicas at once (parallel_models =/= 1) is valid + """ + if parallel_models == 1: + return + if fitting.get("genrep"): + raise CheckError("Replica generation is not supported yet for parallel models") + if fitting["parameters"].get("layer_type") != "dense": + raise CheckError("Parallelization has only been tested with layer_type=='dense'") diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index 3081b76b82..69d9b1f819 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -75,6 +75,7 @@ def initialize_seeds(replica: list, trvlseed: int, nnseed: int, mcseed: int, gen # Action to be called by valid phys # All information defining the NN should come here in the "parameters" dict +@n3fit.checks.can_run_in_parallel @n3fit.checks.check_consistent_basis @n3fit.checks.wrapper_check_NN @n3fit.checks.wrapper_hyperopt From 765b735363872d8bd26e3a1db473301a19e71cc8 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Mon, 21 Dec 2020 17:09:31 +0100 Subject: [PATCH 10/27] dont let tensors get out of the model --- n3fit/src/n3fit/backends/keras_backend/MetaModel.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index 7d36fa6386..d30a3c2aef 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -9,6 +9,7 @@ import tensorflow as tf from tensorflow.keras.models import Model from tensorflow.keras import optimizers as Kopt +from tensorflow.python.keras.utils import tf_utils from n3fit.backends.keras_backend.operations import numpy_to_tensor # Check the TF version to check if legacy-mode is needed (TF < 2.2) @@ -198,7 +199,10 @@ def losses_fun(): self.compute_losses_function = losses_fun - return self.compute_losses_function() + ret = self.compute_losses_function() + # undocumented TF function that converts all the tensors from the ret dictionary to numpy arrays + # if it dissapears, equivalent to {k: i.numpy() for k, i in ret.items()} + return tf_utils.to_numpy_or_python_type(ret) def evaluate(self, x=None, y=None, **kwargs): From 9fd2bbb8db7ec1d70de0dfef250f27239df95d76 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 22 Dec 2020 10:03:04 +0100 Subject: [PATCH 11/27] change stopping to accept the replica dimension --- n3fit/src/n3fit/ModelTrainer.py | 12 +- n3fit/src/n3fit/io/writer.py | 20 +- n3fit/src/n3fit/performfit.py | 22 +- n3fit/src/n3fit/stopping.py | 384 ++++++++++++++------------------ 4 files changed, 196 insertions(+), 242 deletions(-) diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py index 73329ae617..fb2f75f63d 100644 --- a/n3fit/src/n3fit/ModelTrainer.py +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -759,10 +759,10 @@ def evaluate(self, stopping_object): # Needs to receive a `stopping_object` in order to select the part of the # training and the validation which are actually `chi2` and not part of the penalty training = self.model_dicts["training"] - experimental = self.model_dicts["experimental"] train_chi2 = stopping_object.evaluate_training(training["model"]) - val_chi2, _ = stopping_object.validation.loss() - exp_chi2 = np.array(experimental["model"].compute_losses()["loss"]) / experimental["ndata"] + val_chi2 = stopping_object.vl_chi2 + experimental = self.model_dicts["experimental"] + exp_chi2 = experimental["model"].compute_losses()["loss"] / experimental["ndata"] return train_chi2, val_chi2, exp_chi2 def hyperparametrizable(self, params): @@ -892,8 +892,7 @@ def hyperparametrizable(self, params): epochs=epochs, ) - # Save validation and training chi2 - training_loss = stopping_object.tr_chi2 + # Save validation chi2 validation_loss = stopping_object.vl_chi2 # Compute experimental loss @@ -918,13 +917,11 @@ def hyperparametrizable(self, params): break # Save all losses - l_train.append(training_loss) l_valid.append(validation_loss) l_exper.append(experimental_loss) dict_out = { "status": passed, - "training_loss": np.average(l_train), "validation_loss": np.average(l_valid), "experimental_loss": np.average(l_exper), } @@ -932,7 +929,6 @@ def hyperparametrizable(self, params): if self.mode_hyperopt: dict_out["loss"] = self.hyper_loss(l_hyper) dict_out["kfold_meta"] = { - "training_losses": l_train, "validation_losses": l_valid, "experimental_losses": l_exper, "hyper_losses": l_hyper, diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index 1f69f04878..e852404dde 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -69,13 +69,10 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): # Check the directory exist, if it doesn't, generate it os.makedirs(replica_path_set, exist_ok=True) - # Get the replica status for this object - ii, replica_status = self.stopping_object.get_next_replica() stop_epoch = self.stopping_object.epoch_of_the_stop - # TODO, this is wrong, will be dealt with later - tr_chi2 = tr_chi2.tolist()[ii] - true_chi2 = true_chi2.tolist()[ii] - vl_chi2 = vl_chi2.tolist()[0] + + # Get the replica status for this object + _, replica_status = self.stopping_object.get_next_replica() # export PDF grid to file storefit( @@ -96,14 +93,21 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): self.timings, ) - # TODO: compute the chi2s directly from the stopping object # export all metadata from the fit to a single yaml file output_file = f"{replica_path_set}/{fitname}.json" json_dict = jsonfit( replica_status, self.pdf_object, tr_chi2, true_chi2, stop_epoch, self.timings ) with open(output_file, "w") as fs: - json.dump(json_dict, fs, indent=2) + json.dump(json_dict, fs, indent=2, cls = SuperEncoder) + + +class SuperEncoder(json.JSONEncoder): + """ Custom json encoder to get around the fact that np.float32 =/= float """ + def default(self, o): + if isinstance(o, np.float32): + return float(o) + return super().default(o) def jsonfit(replica_status, pdf_object, tr_chi2, true_chi2, epoch_stop, timing): diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index 69d9b1f819..efa53f542d 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -324,6 +324,7 @@ def performfit( ) final_time = stopwatch.stop() + all_training_chi2, all_val_chi2, all_exp_chi2 = the_model_trainer.evaluate(stopping_object) for i, pdf_model in enumerate(pdf_models): # Each model goes into its own replica folder @@ -341,9 +342,12 @@ def performfit( final_time, ) - # Now write the data down - # TODO: recompute training, valudation and experimental _per_ pdfmodel - training_chi2, val_chi2, exp_chi2 = the_model_trainer.evaluate(stopping_object) + # Get the right chi2s + training_chi2 = np.take(all_training_chi2, i) + val_chi2 = np.take(all_val_chi2, i) + exp_chi2 = np.take(all_exp_chi2, i) + + # And write the data down writer_wrapper.write_data( replica_path_set, output_path.name, training_chi2, val_chi2, exp_chi2 ) @@ -358,12 +362,12 @@ def performfit( # If the history of weights is active then loop over it # rewind the state back to every step and write down the results - for step in range(len(stopping_object.history.reloadable_history)): - stopping_object.history.rewind(step) - new_path = output_path / f"history_step_{step}/replica_{replica_number}" - # We need to recompute the experimental chi2 for this point - training_chi2, val_chi2, exp_chi2 = the_model_trainer.evaluate(stopping_object) - writer_wrapper.write_data(new_path, output_path.name, training_chi2, val_chi2, exp_chi2) +# for step in range(len(stopping_object.history.reloadable_history)): +# stopping_object.history.rewind(step) +# new_path = output_path / f"history_step_{step}/replica_{replica_number}" +# # We need to recompute the experimental chi2 for this point +# training_chi2, val_chi2, exp_chi2 = the_model_trainer.evaluate(stopping_object) +# writer_wrapper.write_data(new_path, output_path.name, training_chi2, val_chi2, exp_chi2) # So every time we want to capture output_path.name and addd a history_step_X # parallel to the nnfit folder diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index 35706f6c70..4da39007ad 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -41,6 +41,7 @@ # Pass/veto keys POS_OK = "POS_PASS" POS_BAD = "POS_VETO" +THRESHOLD_POS = 1e-6 def parse_ndata(all_data): @@ -145,43 +146,97 @@ class FitState: Parameters ---------- - all_tr_chi2: dict - Chi2 for all training datasets computed before the update of the weights - all_vl_chi2: dict - Chi2 for all validation datasets computed after the update of the weights - info: dict - Full return state of the Validation model + training_info: dict + all losses for the training model + validation_info: dict + all losses for the validation model """ + vl_ndata = None + tr_ndata = None + vl_suffix = None + + def __init__(self, training_info, validation_info): + if self.vl_ndata is None or self.tr_ndata is None or self.vl_suffix is None: + raise ValueError("FitState cannot be instantiated until vl_ndata, tr_ndata and vl_suffix are filled") + self.training = training_info + self.validation = validation_info + self._parsed = False + self._vl_chi2 = None + self._tr_chi2 = None + self._vl_dict = None + self._tr_dict = None - def __init__(self, all_tr_chi2, all_vl_chi2, info): - self.all_tr_chi2 = all_tr_chi2 - self.all_vl_chi2 = all_vl_chi2 - self.info = info - # These two variables are only filled for specific points - # in order to save precious memory, and only when we are - # saving the fit history each X number of epoch - self.weights = None - self.best_epoch = 0 + @property + def vl_loss(self): + """Return the total validation loss as it comes from the info dictionaries""" + return self.validation.get("loss") @property - def vl_chi2(self): - """ Returns the total validation chi2 """ - return self.all_vl_chi2["total"] + def tr_loss(self): + """Return the total validation loss as it comes from the info dictionaries""" + return self.training.get("loss") + + def _parse_chi2(self): + """ + Parses the chi2 from the losses according to the `tr_ndata` and + `vl_ndata` dictionaries of {dataset: n_points} + """ + if self._parsed: + return + if self.training is not None: + self._tr_chi2, self._tr_dict = parse_losses(self.training, self.tr_ndata) + if self.validation is not None: + self._vl_chi2, self._vl_dict = parse_losses(self.validation, self.vl_ndata, suffix=self.vl_suffix) @property def tr_chi2(self): - """ Returns the total training chi2 """ - return self.all_tr_chi2["total"] + self._parse_chi2() + return self._tr_chi2 - def vl_chi2_for_replica(self, i): - """ Returns the validation_chi2 for a given replica """ - return self.all_vl_chi2["total"][i] + @property + def vl_chi2(self): + self._parse_chi2() + return self._vl_chi2 + + @property + def all_tr_chi2(self): + self._parse_chi2() + return self._tr_dict + + @property + def all_vl_chi2(self): + self._parse_chi2() + return self._vl_dict + + def all_tr_chi2_for_replica(self, r): + """" Return the tr chi2 per dataset for a given replica """ + return {k:np.take(i, r) for k,i in self.all_tr_chi2.items()} + + def all_vl_chi2_for_replica(self, r): + """" Return the vl chi2 per dataset for a given replica """ + return {k:np.take(i, r) for k,i in self.all_vl_chi2.items()} + + def total_partial_tr_chi2(self): + """ Return the tr chi2 summed over replicas per experiment""" + return {k:np.sum(i) for k,i in self.all_tr_chi2.items()} + + def total_partial_vl_chi2(self): + """ Return the vl chi2 summed over replicas per experiment""" + return {k:np.sum(i) for k,i in self.all_tr_chi2.items()} + + def total_tr_chi2(self): + """ Return the total tr chi2 summed over replicas """ + return np.sum(self.tr_chi2) + + def total_vl_chi2(self): + """ Return the total vl chi2 summed over replicas """ + return np.sum(self.vl_chi2) def __str__(self): return f"chi2: tr={self.tr_chi2} vl={self.vl_chi2}" -class ReplicaBest: +class ReplicaState: """ Extra complication which eventually will be merged with someone else but it is here only for development.""" @@ -241,32 +296,28 @@ class FitHistory: save_weights_each: int if given, it will save a snapshot of the fit every `save_weights_each` epochs """ - def __init__(self, pdf_models, save_weights_each=None): - # Save a list of status per replica + def __init__(self, pdf_models, tr_ndata, vl_ndata, save_weights_each=None): + # Create a ReplicaState object for all models + # which will hold the best chi2 and weights per replica self._replicas = [] for pdf_model in pdf_models: - self._replicas.append(ReplicaBest(pdf_model)) + self._replicas.append(ReplicaState(pdf_model)) + + if vl_ndata is None: + vl_ndata = tr_ndata + vl_suffix = "loss" + else: + vl_ndata = vl_ndata + vl_suffix = "val_loss" + # All instances of FitState should use these + FitState.tr_ndata = tr_ndata + FitState.vl_ndata = vl_ndata + FitState.vl_suffix = vl_suffix + # Save a list of status for the entire fit self._history = [] - - self._save_weights_each = save_weights_each - # Initialize variables for the history - self._weights = None - self._best_epoch = None + self.best_epoch = None self.final_epoch = None - # Initialize variables for the snapshots - self.reloadable_history = [] - - @property - def best_epoch(self): - """ Epoch of the best fit """ - return self._best_epoch - - @best_epoch.setter - def best_epoch(self, epoch): - """ Saves the current weight """ - self._weights = self._pdf_model.get_weights() - self._best_epoch = epoch def get_state(self, epoch): """ Get the FitState of the system for a given epoch """ @@ -278,29 +329,7 @@ def best_state(self): return None else: index = self.best_epoch - best_state = self._history[index] - return best_state - - def best_vl(self): - """ Returns the chi2 of the best fit - if there was no best fit returns `INITIAL_CHI2` - if there was a problem, returns `TERRIBLE_CHI2` """ - if not self._weights: - return TERRIBLE_CHI2 - best_state = self.best_state() - if best_state: - return best_state.vl_chi2 - else: - return INITIAL_CHI2 - - def best_tr(self): - """ Returns the training chi2 of the best fit - if there are no best fit, returns the last one """ - best_state = self.best_state() - if best_state: - return best_state.tr_chi2 - else: - return self._history[self.final_epoch].tr_chi2 + return self._history[index] def save_best_replica(self, i, epoch = None): """ Save the state of replica ``i`` as a best fit so far. @@ -309,14 +338,15 @@ def save_best_replica(self, i, epoch = None): """ if epoch is None: epoch = self.final_epoch - chi2 = self._history[epoch].vl_chi2[i] + chi2 = self._history[epoch].vl_loss[i] self._replicas[i].register_best(chi2, epoch) + return False - def all_best_vl(self): - """ Returns the best validation chi2 for each replica """ + def all_best_vl_loss(self): + """ Returns the best validation loss for each replica """ return [i.best_vl for i in self._replicas] - def register(self, fitstate, epoch): + def register(self, epoch, training_info, validation_info): """ Save a new fitstate and updates the current final epoch Every `save_weights_each` (if set) saves a snapshot of the current best fit into the fitstate @@ -329,14 +359,11 @@ def register(self, fitstate, epoch): `epoch` the current epoch of the fit """ + # Save all the information in a fitstate object + fitstate = FitState(training_info, validation_info) self.final_epoch = epoch self._history.append(fitstate) - if self._save_weights_each: - save_here = (epoch + 1) % self._save_weights_each - # TODO this must be done differently now -# if save_here == 0: -# fitstate.register_weigths(self._weights, self.best_epoch) -# self.reloadable_history.append(fitstate) + return fitstate def reload(self, weights=None): """ Reloads the best fit weights into the model @@ -347,8 +374,6 @@ def reload(self, weights=None): if weights is None: for replica in self._replicas: replica.reload() -# if weights: -# self._pdf_model.set_weights(weights) def rewind(self, step): """ Rewind the FitHistory object to the step `step` in the fit @@ -395,28 +420,27 @@ def __init__( validation_model, all_data_dicts, pdf_models, - threshold_positivity=1e-6, + threshold_positivity=THRESHOLD_POS, total_epochs=0, stopping_patience=7000, threshold_chi2=10.0, dont_stop=False, save_weights_each=None, ): - # Parse the training, validation and positivity sets from all the input dictionaries - self._tr_ndata, vl_ndata, pos_sets = parse_ndata(all_data_dicts) + # Save the validation object + self._validation = validation_model - # Create the Validation, Positivity and History objects - if vl_ndata is None: - self.validation = Validation(validation_model, self._tr_ndata, no_validation=True) - else: - self.validation = Validation(validation_model, vl_ndata) - self.positivity = Positivity(threshold_positivity, pos_sets) + # Create the History object + tr_ndata, vl_ndata, pos_sets = parse_ndata(all_data_dicts) + self.history = FitHistory(pdf_models, tr_ndata, vl_ndata) - self.history = FitHistory(pdf_models) + # And the positivity checker + self._positivity = Positivity(threshold_positivity, pos_sets) # Initialize internal variables for the stopping self.n_replicas = len(pdf_models) self.threshold_chi2 = threshold_chi2 + self.dont_stop = dont_stop self.stop_now = False self.stopping_patience = stopping_patience @@ -427,13 +451,10 @@ def __init__( @property def vl_chi2(self): - """ Validation chi2 """ - return self.history.best_vl() - - @property - def tr_chi2(self): - """ Training chi2 """ - return self.history.best_tr() + """ Current validation chi2 """ + validation_info = self._validation.compute_losses() + fitstate = FitState(None, validation_info) + return fitstate.vl_chi2 @property def e_best_chi2(self): @@ -465,8 +486,8 @@ def evaluate_training(self, training_model): chi2 of the given ``training_model`` """ training_info = training_model.compute_losses() - tr_chi2, _ = parse_losses(training_info, self._tr_ndata) - return tr_chi2 + fitstate = FitState(training_info, None) + return fitstate.tr_chi2 def monitor_chi2(self, training_info, epoch, print_stats=False): """ @@ -483,7 +504,7 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): Parameters ---------- training_info: dict - output of a .fit() call, dictionary of the total loss (summed over replicas) for + output of a .fit() call, dictionary of the total loss (summed over replicas) for each experiment epoch: int index of the epoch @@ -493,62 +514,44 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): pass_ok: bool true/false according to the status of the run """ - # Step 1. Preprocess the event, count it towards the stopping degree - # parse the training information and check whether it is a good point - tr_chi2, all_tr = parse_losses(training_info, self._tr_ndata) - - if np.isnan(tr_chi2): + # Step 1. Check whether the fit has NaN'd and stop it if so + if np.isnan(training_info["loss"]): log.warning(" > NaN found, stopping activated") self.stop_now = True # If we had a good model at any point, reload self.history.reload() # TODO return False - self.stopping_degree += self.count - - # Step 2. Check the validation loss at this point - # each loss is an array of the loss per replica - vl_chi2, all_vl = self.validation.loss() + # Step 2. Compute the validation metrics + validation_info = self._validation.compute_losses() - # Step 3. Store information about the run and print stats if asked - fitstate = FitState(all_tr, all_vl, self.validation.state) - self.history.register(fitstate, epoch) - - if print_stats: - self.print_current_stats(epoch, fitstate) + # Step 3. Register the current point in (the) history + fitstate = self.history.register(epoch, training_info, validation_info) # Step 4. Check whether this is a better fit # this means improving vl_chi2 and passing positivity - - # Get the values that pass validation - passes_val = vl_chi2 < self.threshold_chi2 - passes_val &= vl_chi2 < self.history.all_best_vl() + passes_val = fitstate.vl_loss < self.threshold_chi2 + passes_val &= fitstate.vl_loss < self.history.all_best_vl_loss() # And the ones that pass positivity - passes_pos = self.positivity(fitstate) + passes_pos = self._positivity(fitstate) + passes = passes_val & passes_pos - # Now loop over the valid indices to check whether the vl improved - # TODO: check whether this loop is hurting at all performance (shouldnt???) - for i in np.where(passes_val & passes_pos)[0]: - self.history.save_best_replica(i) + self.stopping_degree += self.count - # There is no stopping for now - return True - - # TODO for now we force all fits to get to the end + # Step 5. loop over the valid indices to check whether the vl improved + stop_here = all(passes) + for i in np.where(passes)[0]: + stop_here &= self.history.save_best_replica(i) + self.stopping_degree = 0 + self.count = 1 + if self.stopping_degree > self.stopping_patience: + stop_here = True - # For each of them, check whether the validation chi2 is better or not - if self.positivity(fitstate) and vl_chi2 < self.threshold_chi2: - if vl_chi2 < self.history.best_vl(): - # Set the new best - self.history.best_epoch = epoch - # Reset stopping info - self.stopping_degree = 0 - # Initialize the counter - self.count = 1 + if print_stats: + self.print_current_stats(epoch, fitstate) - # If your patience has ended, prepare for stop - if self.stopping_degree > self.stopping_patience: + if stop_here: self.make_stop() return True @@ -561,20 +564,21 @@ def make_stop(self): def print_current_stats(self, epoch, fitstate): """ - Prints the last validation and training loss saved + Prints ``fitstate`` training and validation chi2s """ epoch_index = epoch + 1 - tr_loss = fitstate.tr_chi2 - vl_loss = fitstate.vl_chi2 - total_str = f"At epoch {epoch_index}/{self.total_epochs}, total loss: {tr_loss}\n" - - partials = [] - for experiment in self._tr_ndata: - chi2 = fitstate.all_tr_chi2[experiment] - partials.append(f"{experiment}: {chi2:.3f}") - total_str += ", ".join(partials) - - total_str += f"\nValidation loss at this point: {vl_loss}" + tr_chi2 = fitstate.total_tr_chi2() + vl_chi2 = fitstate.total_vl_chi2() + total_str = f"At epoch {epoch_index}/{self.total_epochs}, total loss: {tr_chi2}\n" + + # The partial chi2 makes no sense for more than one replica at once: + if self.n_replicas == 1: + partial_tr_chi2 = fitstate.total_partial_tr_chi2() + partials = [] + for experiment, chi2 in partial_tr_chi2.items(): + partials.append(f"{experiment}: {chi2:.3f}") + total_str += ", ".join(partials) + "\n" + total_str += f"Validation loss at this point: {vl_chi2}" log.info(total_str) def stop_here(self): @@ -606,106 +610,52 @@ def positivity_status(self): return POS_BAD def get_next_replica(self): - """ Return the next ReplicaBest object""" + """ Return the next ReplicaState object""" if self.replica_iterator is None: self.replica_iterator = iter(self.history._replicas) self.ii = -1 self.ii += 1 return self.ii, next(self.replica_iterator) - def chi2exps_str(self, log_each=100): + def chi2exps_str(self, replica=0, log_each=100): """ Returns a list of log-string with the status of the fit every `log_each` epochs Parameters ---------- - `log_each` + replica: int + which replica are we writing the log for + log_each: int every how many epochs to print the log Returns ------- - `file_list` - a list of string to be printed as `chi2exps.log` + file_list: list(str) + a list of strings to be printed as `chi2exps.log` """ final_epoch = self.history.final_epoch file_list = [] for i in range(log_each - 1, final_epoch + 1, log_each): fitstate = self.history.get_state(i) - all_tr = fitstate.all_tr_chi2 - all_vl = fitstate.all_vl_chi2 + all_tr = fitstate.all_tr_chi2_for_replica(replica) + all_vl = fitstate.all_vl_chi2_for_replica(replica) # Here it is assumed the validation exp set is always a subset of the training exp set data_list = [] - for exp in self._tr_ndata: - tr_loss = all_tr[exp] + for exp, tr_loss in all_tr.items(): vl_loss = all_vl.get(exp, 0.0) data_str = f"{exp}: {tr_loss} {vl_loss}" data_list.append(data_str) data = "\n".join(data_list) epoch_index = i + 1 - total_tr_loss = fitstate.tr_chi2 - total_vl_loss = fitstate.vl_chi2 strout = f""" Epoch: {epoch_index} {data} -Total: training = {total_tr_loss} validation = {total_vl_loss} """ file_list.append(strout) return file_list -class Validation: - """ - Controls the NNPDF cross-validation algorithm - - The cross-validation refers to the validation loss of the points of the dataset - not used in the fitting. - In general for any points considered here there will accompanying points from the - same dataset being included in the fitting. - - Parameters - ---------- - model: n3fit.backends.MetaModel - the model with the validation mask applied - (and compiled with the validation data and covmat) - """ - - def __init__(self, model, ndata_dict, verbose=False, no_validation=False): - self.model = model - self.state = None - self.verbose = verbose - self.ndata_dict = ndata_dict - self.n_val_exp = len(ndata_dict) - if no_validation: - self.suffix = "loss" - else: - self.suffix = "val_loss" - - def _compute_validation_loss(self): - """ - Evaluates the validation model and returns a tuple (`total_loss`, `vl_dict`) - with the information for the validation loss by experimenet normalized to the - number of points of each experiment - - Returns - ------- - total_loss: float - total vale for the validation loss - vl_dict: dict - dictionary containing a map of experiment names and their loss per replica - """ - loss_dict = self.model.compute_losses() - self.state = loss_dict - return parse_losses(loss_dict, self.ndata_dict, suffix=self.suffix) - - def loss(self): - """ - Returns a tuple with the validation loss and a - dictionary for the validation loss per experiment - """ - return self._compute_validation_loss() - - class Positivity: """ Controls the positivity requirements. @@ -754,4 +704,4 @@ def __call__(self, fitstate): Checks whether a given FitState object passes the positivity requirement """ - return self.check_positivity(fitstate.info) + return self.check_positivity(fitstate.validation) From 867cb934f7642f4da09c8b1ba9d18ab8dc1597fb Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 22 Dec 2020 15:31:52 +0100 Subject: [PATCH 12/27] several more fixes --- n3fit/src/n3fit/ModelTrainer.py | 7 ++-- n3fit/src/n3fit/checks.py | 10 ++++- n3fit/src/n3fit/performfit.py | 3 -- n3fit/src/n3fit/stopping.py | 65 +++++++++------------------------ 4 files changed, 28 insertions(+), 57 deletions(-) diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py index fb2f75f63d..bcfb9f4bea 100644 --- a/n3fit/src/n3fit/ModelTrainer.py +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -710,10 +710,9 @@ def _train_and_fit(self, training_model, stopping_object, epochs=100): callbacks=self.callbacks + [callback_st, callback_pos, callback_integ], ) - if stopping_object.positivity_pass(): - return self.pass_status - else: - return self.failed_status + # TODO: this needs to be changed for hyperopt + return self.pass_status +# return self.failed_status def _hyperopt_override(self, params): """ Unrolls complicated hyperopt structures into very simple dictionaries""" diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py index 217f367d8a..31fdae7929 100644 --- a/n3fit/src/n3fit/checks.py +++ b/n3fit/src/n3fit/checks.py @@ -355,12 +355,18 @@ def check_consistent_basis(fitting, theoryid): @make_argcheck -def can_run_in_parallel(fitting, parallel_models=1): +def can_run_in_parallel(fitting, replica, parallel_models=1): """ Checks whether a runcard which is trying to run several replicas at once (parallel_models =/= 1) is valid """ + rp = len(replica) + genrep = fitting.get("genrep") + if rp > 1 and not genrep: + raise CheckError("Can't run more than one replica at once if no replicas are to be generated") if parallel_models == 1: return - if fitting.get("genrep"): + if genrep: raise CheckError("Replica generation is not supported yet for parallel models") if fitting["parameters"].get("layer_type") != "dense": raise CheckError("Parallelization has only been tested with layer_type=='dense'") + if rp > 1: + raise CheckError("Parallel mode cannot be used together with multireplica runs") diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index efa53f542d..aec1bd2fb0 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -241,7 +241,6 @@ def performfit( fitting["fitbasis"], nnseed, debug=debug, - save_weights_each=fitting.get("save_weights_each"), kfold_parameters=kfold_parameters, max_cores=maxcores, model_file=fitting.get("load"), @@ -313,13 +312,11 @@ def performfit( """ > > The stopping point has been at: {0} with a loss of {1} which it got at {2}. Stopping degree {3} - Positivity state: {4} """.format( stopping_object.stop_epoch, stopping_object.vl_chi2, stopping_object.e_best_chi2, stopping_object.stopping_degree, - stopping_object.positivity_status(), ) ) diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index 4da39007ad..aa4519c3ea 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -307,7 +307,6 @@ def __init__(self, pdf_models, tr_ndata, vl_ndata, save_weights_each=None): vl_ndata = tr_ndata vl_suffix = "loss" else: - vl_ndata = vl_ndata vl_suffix = "val_loss" # All instances of FitState should use these FitState.tr_ndata = tr_ndata @@ -338,13 +337,14 @@ def save_best_replica(self, i, epoch = None): """ if epoch is None: epoch = self.final_epoch - chi2 = self._history[epoch].vl_loss[i] - self._replicas[i].register_best(chi2, epoch) + self.best_epoch = epoch # TODO: makes sense for only one model in parallel + loss = self._history[epoch].vl_loss[i] + self._replicas[i].register_best(loss, epoch) return False def all_best_vl_loss(self): """ Returns the best validation loss for each replica """ - return [i.best_vl for i in self._replicas] + return np.array([i.best_vl for i in self._replicas]) def register(self, epoch, training_info, validation_info): """ Save a new fitstate and updates the current final epoch @@ -354,9 +354,9 @@ def register(self, epoch, training_info, validation_info): Parameters ---------- fitstate: FitState - FitState object + FitState object the fitstate of the object to save - `epoch` + epoch: int the current epoch of the fit """ # Save all the information in a fitstate object @@ -365,25 +365,13 @@ def register(self, epoch, training_info, validation_info): self._history.append(fitstate) return fitstate - def reload(self, weights=None): + def reload(self): """ Reloads the best fit weights into the model if there are models to be reloaded A set of weights can be enforced as an optional argument """ - # TODO the weights part is tricky - if weights is None: - for replica in self._replicas: - replica.reload() - - def rewind(self, step): - """ Rewind the FitHistory object to the step `step` in the fit - """ - fitstate = self.reloadable_history[step] - historic_weights = fitstate.weights - self.reload(weights=historic_weights) - self.best_epoch = fitstate.best_epoch - self.final_epoch = (step + 1) * self._save_weights_each - 1 - # -1 because we are saving the epochs starting at 0 + for replica in self._replicas: + replica.reload() class Stopping: @@ -527,10 +515,15 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): # Step 3. Register the current point in (the) history fitstate = self.history.register(epoch, training_info, validation_info) + if print_stats: + self.print_current_stats(epoch, fitstate) # Step 4. Check whether this is a better fit # this means improving vl_chi2 and passing positivity - passes_val = fitstate.vl_loss < self.threshold_chi2 + passes_val = True + # Don't start counting until the chi2 of the validation goes below a certain threshold + if self.count == 0: + passes_val &= fitstate.vl_chi2 < self.threshold_chi2 passes_val &= fitstate.vl_loss < self.history.all_best_vl_loss() # And the ones that pass positivity passes_pos = self._positivity(fitstate) @@ -548,9 +541,6 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): if self.stopping_degree > self.stopping_patience: stop_here = True - if print_stats: - self.print_current_stats(epoch, fitstate) - if stop_here: self.make_stop() return True @@ -569,7 +559,7 @@ def print_current_stats(self, epoch, fitstate): epoch_index = epoch + 1 tr_chi2 = fitstate.total_tr_chi2() vl_chi2 = fitstate.total_vl_chi2() - total_str = f"At epoch {epoch_index}/{self.total_epochs}, total loss: {tr_chi2}\n" + total_str = f"At epoch {epoch_index}/{self.total_epochs}, total chi2: {tr_chi2}\n" # The partial chi2 makes no sense for more than one replica at once: if self.n_replicas == 1: @@ -578,7 +568,7 @@ def print_current_stats(self, epoch, fitstate): for experiment, chi2 in partial_tr_chi2.items(): partials.append(f"{experiment}: {chi2:.3f}") total_str += ", ".join(partials) + "\n" - total_str += f"Validation loss at this point: {vl_chi2}" + total_str += f"Validation chi2 at this point: {vl_chi2}" log.info(total_str) def stop_here(self): @@ -590,25 +580,6 @@ def stop_here(self): else: return self.stop_now - def positivity_pass(self): - """ Checks whether the positivity loss is below the requested threshold - If there is no best state, the positivity (obv) cannot pass - """ - best_state = self.history.best_state() - if best_state is not None and self.positivity(best_state): - return True - else: - return False - - def positivity_status(self): - """ Checks whether the positivity loss is below the requested threshold - If there is no best state, the positivity (obv) cannot pass - """ - if self.positivity_pass(): - return POS_OK - else: - return POS_BAD - def get_next_replica(self): """ Return the next ReplicaState object""" if self.replica_iterator is None: @@ -695,8 +666,6 @@ def check_positivity(self, history_object): for key in self.positivity_sets: key_loss = f"{key}_loss" positivity_pass &= history_object[key_loss] < self.threshold - if not all(positivity_pass): - return np.array(positivity_pass) return np.array(positivity_pass) def __call__(self, fitstate): From 579429bc375462245bf1ecff86c386bac830e9d9 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 22 Dec 2020 15:55:17 +0100 Subject: [PATCH 13/27] many changes to stopping, remove deprecated options --- n3fit/src/n3fit/ModelTrainer.py | 5 - n3fit/src/n3fit/io/writer.py | 2 +- n3fit/src/n3fit/performfit.py | 12 -- n3fit/src/n3fit/stopping.py | 218 ++++++++++++++++---------------- 4 files changed, 108 insertions(+), 129 deletions(-) diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py index bcfb9f4bea..e307be3401 100644 --- a/n3fit/src/n3fit/ModelTrainer.py +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -174,7 +174,6 @@ def __init__( pass_status="ok", failed_status="fail", debug=False, - save_weights_each=False, kfold_parameters=None, max_cores=None, model_file=None, @@ -192,8 +191,6 @@ def __init__( pass_status: flag to signal a good run failed_status: flag to signal a bad run debug: flag to activate some debug options - save_weights_each: if set, save the state of the fit - every ``save_weights_each`` epochs model_file: str whether to save the models sum_rules: str whether sum rules should be enabled (All, MSR, VSR, False) """ @@ -212,7 +209,6 @@ def __init__( self.pass_status = pass_status self.failed_status = failed_status self.debug = debug - self.save_weights_each = save_weights_each self.all_datasets = [] self.parallel_models = parallel_models @@ -877,7 +873,6 @@ def hyperparametrizable(self, params): pdf_models, total_epochs=epochs, stopping_patience=stopping_epochs, - save_weights_each=self.save_weights_each, threshold_positivity=threshold_pos, threshold_chi2=threshold_chi2 ) diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index e852404dde..8c325b2910 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -72,7 +72,7 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): stop_epoch = self.stopping_object.epoch_of_the_stop # Get the replica status for this object - _, replica_status = self.stopping_object.get_next_replica() + replica_status = self.stopping_object.get_next_replica() # export PDF grid to file storefit( diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index aec1bd2fb0..4016690cff 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -357,17 +357,5 @@ def performfit( # Need to use "str" here because TF 2.2 has a bug for paths objects (fixed in 2.3 though) pdf_model.save_weights(str(model_file_path), save_format="h5") - # If the history of weights is active then loop over it - # rewind the state back to every step and write down the results -# for step in range(len(stopping_object.history.reloadable_history)): -# stopping_object.history.rewind(step) -# new_path = output_path / f"history_step_{step}/replica_{replica_number}" -# # We need to recompute the experimental chi2 for this point -# training_chi2, val_chi2, exp_chi2 = the_model_trainer.evaluate(stopping_object) -# writer_wrapper.write_data(new_path, output_path.name, training_chi2, val_chi2, exp_chi2) - - # So every time we want to capture output_path.name and addd a history_step_X - # parallel to the nnfit folder - if tboard is not None: log.info("Tensorboard logging information is stored at %s", log_path) diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index aa4519c3ea..786e14ca17 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -134,30 +134,33 @@ def parse_losses(history_object, data, suffix="loss"): class FitState: """ - Holds the state of the chi2 during the fit for all replicas + Holds the state of the chi2 during the fit for all replicas - It holds the necessary information to reload the fit - to a specific point in time if we are interested on reloading - (otherwise the relevant variables stay empty to save memory) + It holds the necessary information to reload the fit + to a specific point in time if we are interested on reloading + (otherwise the relevant variables stay empty to save memory) - Note: the training chi2 is computed before the update of the weights - so it is the chi2 that informed the updated corresponding to this state. - The validation chi2 instead is computed after the update of the weights. + Note: the training chi2 is computed before the update of the weights + so it is the chi2 that informed the updated corresponding to this state. + The validation chi2 instead is computed after the update of the weights. - Parameters - ---------- - training_info: dict - all losses for the training model - validation_info: dict - all losses for the validation model + Parameters + ---------- + training_info: dict + all losses for the training model + validation_info: dict + all losses for the validation model """ + vl_ndata = None tr_ndata = None vl_suffix = None def __init__(self, training_info, validation_info): if self.vl_ndata is None or self.tr_ndata is None or self.vl_suffix is None: - raise ValueError("FitState cannot be instantiated until vl_ndata, tr_ndata and vl_suffix are filled") + raise ValueError( + "FitState cannot be instantiated until vl_ndata, tr_ndata and vl_suffix are filled" + ) self.training = training_info self.validation = validation_info self._parsed = False @@ -186,7 +189,9 @@ def _parse_chi2(self): if self.training is not None: self._tr_chi2, self._tr_dict = parse_losses(self.training, self.tr_ndata) if self.validation is not None: - self._vl_chi2, self._vl_dict = parse_losses(self.validation, self.vl_ndata, suffix=self.vl_suffix) + self._vl_chi2, self._vl_dict = parse_losses( + self.validation, self.vl_ndata, suffix=self.vl_suffix + ) @property def tr_chi2(self): @@ -210,19 +215,19 @@ def all_vl_chi2(self): def all_tr_chi2_for_replica(self, r): """" Return the tr chi2 per dataset for a given replica """ - return {k:np.take(i, r) for k,i in self.all_tr_chi2.items()} + return {k: np.take(i, r) for k, i in self.all_tr_chi2.items()} def all_vl_chi2_for_replica(self, r): """" Return the vl chi2 per dataset for a given replica """ - return {k:np.take(i, r) for k,i in self.all_vl_chi2.items()} + return {k: np.take(i, r) for k, i in self.all_vl_chi2.items()} def total_partial_tr_chi2(self): """ Return the tr chi2 summed over replicas per experiment""" - return {k:np.sum(i) for k,i in self.all_tr_chi2.items()} + return {k: np.sum(i) for k, i in self.all_tr_chi2.items()} def total_partial_vl_chi2(self): """ Return the vl chi2 summed over replicas per experiment""" - return {k:np.sum(i) for k,i in self.all_tr_chi2.items()} + return {k: np.sum(i) for k, i in self.all_tr_chi2.items()} def total_tr_chi2(self): """ Return the total tr chi2 summed over replicas """ @@ -237,7 +242,7 @@ def __str__(self): class ReplicaState: - """ Extra complication which eventually will be merged with someone else + """Extra complication which eventually will be merged with someone else but it is here only for development.""" def __init__(self, pdf_model): @@ -280,28 +285,27 @@ def reload(self): class FitHistory: """ - Keeps a list of FitState items holding the full history of the fit. + Keeps a list of FitState items holding the full history of the fit. - It also keeps track of the best epoch and the associated weights. + It also keeps track of the best epoch and the associated weights. - Can be iterated when there are snapshots of the fit being saved. - When iterated it will rewind the fit to each of the point in history - that have been saved. + Can be iterated when there are snapshots of the fit being saved. + When iterated it will rewind the fit to each of the point in history + that have been saved. - Parameters - ---------- - pdf_models: n3fit.backends.MetaModel - list of PDF models being trained, used to saved the weights - - save_weights_each: int - if given, it will save a snapshot of the fit every `save_weights_each` epochs + Parameters + ---------- + pdf_models: n3fit.backends.MetaModel + list of PDF models being trained, used to saved the weights """ - def __init__(self, pdf_models, tr_ndata, vl_ndata, save_weights_each=None): + + def __init__(self, pdf_models, tr_ndata, vl_ndata): # Create a ReplicaState object for all models # which will hold the best chi2 and weights per replica self._replicas = [] for pdf_model in pdf_models: self._replicas.append(ReplicaState(pdf_model)) + self._iter_replicas = iter(self._replicas) if vl_ndata is None: vl_ndata = tr_ndata @@ -320,36 +324,30 @@ def __init__(self, pdf_models, tr_ndata, vl_ndata, save_weights_each=None): def get_state(self, epoch): """ Get the FitState of the system for a given epoch """ - return self._history[epoch] - - def best_state(self): - """ Return the FitState object corresponding to the best fit """ - if self.best_epoch is None: - return None - else: - index = self.best_epoch - return self._history[index] - - def save_best_replica(self, i, epoch = None): - """ Save the state of replica ``i`` as a best fit so far. + try: + return self._history[epoch] + except IndexError as e: + raise ValueError( + f"Tried to get obtain the state for epoch {epoch} when only {len(self._history)} epochs have been saved" + ) from e + + def save_best_replica(self, i, epoch=None): + """Save the state of replica ``i`` as a best fit so far. If an epoch is given, save the best as the given epoch, otherwise use the last one """ if epoch is None: epoch = self.final_epoch - self.best_epoch = epoch # TODO: makes sense for only one model in parallel - loss = self._history[epoch].vl_loss[i] + self.best_epoch = epoch # TODO: makes sense for only one model in parallel + loss = self.get_state(epoch).vl_loss[i] self._replicas[i].register_best(loss, epoch) - return False def all_best_vl_loss(self): """ Returns the best validation loss for each replica """ return np.array([i.best_vl for i in self._replicas]) def register(self, epoch, training_info, validation_info): - """ Save a new fitstate and updates the current final epoch - Every `save_weights_each` (if set) saves a snapshot of the current best fit into - the fitstate + """Save a new fitstate and updates the current final epoch Parameters ---------- @@ -366,41 +364,42 @@ def register(self, epoch, training_info, validation_info): return fitstate def reload(self): - """ Reloads the best fit weights into the model + """Reloads the best fit weights into the model if there are models to be reloaded A set of weights can be enforced as an optional argument """ for replica in self._replicas: replica.reload() + def __next__(self): + return next(self._iter_replicas) + class Stopping: """ - Driver of the stopping algorithm + Driver of the stopping algorithm - Note, if the total number of points in the validation dictionary is None, it is assumed - the validation_model actually corresponds to the training model. + Note, if the total number of points in the validation dictionary is None, it is assumed + the validation_model actually corresponds to the training model. - Parameters - ---------- - validation_model: n3fit.backends.MetaModel - the model with the validation mask applied - (and compiled with the validation data and covmat) - all_data_dict: dict - list containg all dictionaries containing all information about - the experiments/validation/regularizers/etc to be parsed by Stopping - pdf_models: list(n3fit.backends.MetaModel) - list of pdf_models being trained - threshold_positivity: float - maximum value allowed for the sum of all positivity losses - total_epochs: int - total number of epochs - stopping_patience: int - how many epochs to wait for the validation loss to improve - dont_stop: bool - dont care about early stopping - save_weights_each: int - every how many epochs to save a snapshot of the fit + Parameters + ---------- + validation_model: n3fit.backends.MetaModel + the model with the validation mask applied + (and compiled with the validation data and covmat) + all_data_dict: dict + list containg all dictionaries containing all information about + the experiments/validation/regularizers/etc to be parsed by Stopping + pdf_models: list(n3fit.backends.MetaModel) + list of pdf_models being trained + threshold_positivity: float + maximum value allowed for the sum of all positivity losses + total_epochs: int + total number of epochs + stopping_patience: int + how many epochs to wait for the validation loss to improve + dont_stop: bool + dont care about early stopping """ def __init__( @@ -413,14 +412,13 @@ def __init__( stopping_patience=7000, threshold_chi2=10.0, dont_stop=False, - save_weights_each=None, ): # Save the validation object self._validation = validation_model # Create the History object tr_ndata, vl_ndata, pos_sets = parse_ndata(all_data_dicts) - self.history = FitHistory(pdf_models, tr_ndata, vl_ndata) + self._history = FitHistory(pdf_models, tr_ndata, vl_ndata) # And the positivity checker self._positivity = Positivity(threshold_positivity, pos_sets) @@ -435,7 +433,6 @@ def __init__( self.stopping_degree = 0 self.count = 0 self.total_epochs = total_epochs - self.replica_iterator = None @property def vl_chi2(self): @@ -446,6 +443,7 @@ def vl_chi2(self): @property def e_best_chi2(self): +<<<<<<< HEAD """ Epoch of the best chi2, if there is no best epoch return the last epoch""" be = self.history.best_epoch @@ -453,14 +451,18 @@ def e_best_chi2(self): return self.stop_epoch return be +======= + """ Epoch of the best chi2 """ + return self._history.best_epoch +>>>>>>> 5fb6df564 (many changes to stopping, remove deprecated options) @property def stop_epoch(self): """ Epoch in which the fit is stopped """ - return self.history.final_epoch + 1 + return self._history.final_epoch + 1 def evaluate_training(self, training_model): - """ Given the training model, evaluates the + """Given the training model, evaluates the model and parses the chi2 of the training datasets Parameters @@ -505,16 +507,14 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): # Step 1. Check whether the fit has NaN'd and stop it if so if np.isnan(training_info["loss"]): log.warning(" > NaN found, stopping activated") - self.stop_now = True - # If we had a good model at any point, reload - self.history.reload() # TODO + self.make_stop() return False # Step 2. Compute the validation metrics validation_info = self._validation.compute_losses() # Step 3. Register the current point in (the) history - fitstate = self.history.register(epoch, training_info, validation_info) + fitstate = self._history.register(epoch, training_info, validation_info) if print_stats: self.print_current_stats(epoch, fitstate) @@ -524,7 +524,7 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): # Don't start counting until the chi2 of the validation goes below a certain threshold if self.count == 0: passes_val &= fitstate.vl_chi2 < self.threshold_chi2 - passes_val &= fitstate.vl_loss < self.history.all_best_vl_loss() + passes_val &= fitstate.vl_loss < self._history.all_best_vl_loss() # And the ones that pass positivity passes_pos = self._positivity(fitstate) passes = passes_val & passes_pos @@ -532,12 +532,12 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): self.stopping_degree += self.count # Step 5. loop over the valid indices to check whether the vl improved - stop_here = all(passes) for i in np.where(passes)[0]: - stop_here &= self.history.save_best_replica(i) + self._history.save_best_replica(i) self.stopping_degree = 0 self.count = 1 + # By using the stopping degree we only stop when none of the replicas are improving anymore if self.stopping_degree > self.stopping_patience: stop_here = True @@ -546,15 +546,15 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): return True def make_stop(self): - """ Convenience method to set the stop_now flag + """Convenience method to set the stop_now flag and reload the history to the point of the best model if any """ self.stop_now = True - self.history.reload() + self._history.reload() def print_current_stats(self, epoch, fitstate): """ - Prints ``fitstate`` training and validation chi2s + Prints ``fitstate`` training and validation chi2s """ epoch_index = epoch + 1 tr_chi2 = fitstate.total_tr_chi2() @@ -572,7 +572,7 @@ def print_current_stats(self, epoch, fitstate): log.info(total_str) def stop_here(self): - """ Returns the stopping status + """Returns the stopping status If `dont_stop` is set returns always False (i.e., never stop) """ if self.dont_stop: @@ -582,11 +582,7 @@ def stop_here(self): def get_next_replica(self): """ Return the next ReplicaState object""" - if self.replica_iterator is None: - self.replica_iterator = iter(self.history._replicas) - self.ii = -1 - self.ii += 1 - return self.ii, next(self.replica_iterator) + return next(self._history) def chi2exps_str(self, replica=0, log_each=100): """ @@ -605,10 +601,10 @@ def chi2exps_str(self, replica=0, log_each=100): file_list: list(str) a list of strings to be printed as `chi2exps.log` """ - final_epoch = self.history.final_epoch + final_epoch = self._history.final_epoch file_list = [] for i in range(log_each - 1, final_epoch + 1, log_each): - fitstate = self.history.get_state(i) + fitstate = self._history.get_state(i) all_tr = fitstate.all_tr_chi2_for_replica(replica) all_vl = fitstate.all_vl_chi2_for_replica(replica) # Here it is assumed the validation exp set is always a subset of the training exp set @@ -629,19 +625,19 @@ def chi2exps_str(self, replica=0, log_each=100): class Positivity: """ - Controls the positivity requirements. + Controls the positivity requirements. - In order to check the positivity passes will check the history of the fitting - as the fitting included positivity sets. - If the sum of all positivity sets losses is above a certain value the model is - not accepted and the training continues. + In order to check the positivity passes will check the history of the fitting + as the fitting included positivity sets. + If the sum of all positivity sets losses is above a certain value the model is + not accepted and the training continues. - Parameters - ---------- - threshold_positivity: float - maximum value allowed for the sum of all positivity losses - positivity_sets: list - list of positivity datasets + Parameters + ---------- + threshold_positivity: float + maximum value allowed for the sum of all positivity losses + positivity_sets: list + list of positivity datasets """ def __init__(self, threshold, positivity_sets): @@ -670,7 +666,7 @@ def check_positivity(self, history_object): def __call__(self, fitstate): """ - Checks whether a given FitState object - passes the positivity requirement + Checks whether a given FitState object + passes the positivity requirement """ return self.check_positivity(fitstate.validation) From edbcb2d2780bdabd7cef1fa880df7ff5f1d5bad3 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 22 Dec 2020 16:16:13 +0100 Subject: [PATCH 14/27] hotfix --- n3fit/src/n3fit/stopping.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index 786e14ca17..d16b80adb6 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -363,6 +363,10 @@ def register(self, epoch, training_info, validation_info): self._history.append(fitstate) return fitstate + def stop_training_replica(self, i): + """ Stop training replica i """ + replica = self._replicas[i] + def reload(self): """Reloads the best fit weights into the model if there are models to be reloaded @@ -426,12 +430,12 @@ def __init__( # Initialize internal variables for the stopping self.n_replicas = len(pdf_models) self.threshold_chi2 = threshold_chi2 + self.stopping_degree = np.zeros(self.n_replicas) + self.count = np.zeros(self.n_replicas) self.dont_stop = dont_stop self.stop_now = False self.stopping_patience = stopping_patience - self.stopping_degree = 0 - self.count = 0 self.total_epochs = total_epochs @property @@ -520,28 +524,28 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): # Step 4. Check whether this is a better fit # this means improving vl_chi2 and passing positivity - passes_val = True # Don't start counting until the chi2 of the validation goes below a certain threshold - if self.count == 0: - passes_val &= fitstate.vl_chi2 < self.threshold_chi2 - passes_val &= fitstate.vl_loss < self._history.all_best_vl_loss() + # once we start counting, don't bother anymore + passes = self.count or ( fitstate.vl_chi2 < self.threshold_chi2 ) + passes = passes and fitstate.vl_loss < self._history.all_best_vl_loss() # And the ones that pass positivity - passes_pos = self._positivity(fitstate) - passes = passes_val & passes_pos + passes = passes and self._positivity(fitstate) self.stopping_degree += self.count # Step 5. loop over the valid indices to check whether the vl improved for i in np.where(passes)[0]: self._history.save_best_replica(i) - self.stopping_degree = 0 - self.count = 1 + self.stopping_degree[i] = 0 + self.count[i] = 1 - # By using the stopping degree we only stop when none of the replicas are improving anymore - if self.stopping_degree > self.stopping_patience: - stop_here = True + stop_replicas = self.count and self.stopping_degree > self.stopping_patience + for i in np.where(stop_replicas)[0]: + self.count[i] = 0 + self._history.stop_training_replica(i) - if stop_here: + # By using the stopping degree we only stop when none of the replicas are improving anymore + if min(self.stopping_degree) > self.stopping_patience: self.make_stop() return True From 74935338518337d306bba8471c12deb9cf05f74d Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 22 Dec 2020 16:33:31 +0100 Subject: [PATCH 15/27] stop at different points for each replica --- n3fit/src/n3fit/stopping.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index d16b80adb6..95dba489c3 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -274,14 +274,20 @@ def positivity_status(self): return POS_BAD def register_best(self, chi2, epoch): + """ Register a new best state and some metadata about it """ self._weights = self._pdf_model.get_weights() self._best_epoch = epoch self._best_vl_chi2 = chi2 def reload(self): + """ Reload the weights of the best state """ if self._weights: self._pdf_model.set_weights(self._weights) + def stop_training(self): + """ Stop training this replica """ + self._pdf_model.trainable = False + class FitHistory: """ @@ -319,9 +325,13 @@ def __init__(self, pdf_models, tr_ndata, vl_ndata): # Save a list of status for the entire fit self._history = [] - self.best_epoch = None self.final_epoch = None + @property + def best_epoch(self): + """ Return the best epoch per replica """ + return [i.best_epoch for i in self._replicas] + def get_state(self, epoch): """ Get the FitState of the system for a given epoch """ try: @@ -338,7 +348,6 @@ def save_best_replica(self, i, epoch=None): """ if epoch is None: epoch = self.final_epoch - self.best_epoch = epoch # TODO: makes sense for only one model in parallel loss = self.get_state(epoch).vl_loss[i] self._replicas[i].register_best(loss, epoch) @@ -365,7 +374,7 @@ def register(self, epoch, training_info, validation_info): def stop_training_replica(self, i): """ Stop training replica i """ - replica = self._replicas[i] + self._replicas[i].stop_training() def reload(self): """Reloads the best fit weights into the model @@ -430,8 +439,8 @@ def __init__( # Initialize internal variables for the stopping self.n_replicas = len(pdf_models) self.threshold_chi2 = threshold_chi2 - self.stopping_degree = np.zeros(self.n_replicas) - self.count = np.zeros(self.n_replicas) + self.stopping_degree = np.zeros(self.n_replicas, dtype=np.int) + self.count = np.zeros(self.n_replicas, dtype=np.int) self.dont_stop = dont_stop self.stop_now = False @@ -526,10 +535,10 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): # this means improving vl_chi2 and passing positivity # Don't start counting until the chi2 of the validation goes below a certain threshold # once we start counting, don't bother anymore - passes = self.count or ( fitstate.vl_chi2 < self.threshold_chi2 ) - passes = passes and fitstate.vl_loss < self._history.all_best_vl_loss() + passes = self.count | ( fitstate.vl_chi2 < self.threshold_chi2 ) + passes &= fitstate.vl_loss < self._history.all_best_vl_loss() # And the ones that pass positivity - passes = passes and self._positivity(fitstate) + passes &= self._positivity(fitstate) self.stopping_degree += self.count @@ -539,7 +548,7 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): self.stopping_degree[i] = 0 self.count[i] = 1 - stop_replicas = self.count and self.stopping_degree > self.stopping_patience + stop_replicas = self.count & (self.stopping_degree > self.stopping_patience) for i in np.where(stop_replicas)[0]: self.count[i] = 0 self._history.stop_training_replica(i) From ab64c294e32af3bd237549230e104e54295be4e5 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 22 Dec 2020 17:21:39 +0100 Subject: [PATCH 16/27] 1-dataset runcards should also work --- n3fit/src/n3fit/backends/keras_backend/MetaModel.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index d30a3c2aef..c7be3f4acc 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -193,6 +193,9 @@ def compute_losses(self): @tf.function def losses_fun(): predictions = self(self._parse_input(None)) + # If we only have one dataset the output changes + if len(out_names) == 2: + predictions = [predictions] total_loss = tf.reduce_sum(predictions, axis=0) ret = [total_loss] + predictions return dict(zip(out_names, ret)) From a875924d0c5dce0d855d84b773a3c9f65d036da3 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Mon, 4 Jan 2021 10:07:40 +0100 Subject: [PATCH 17/27] remove unnecesary middle layers --- .../n3fit/backends/keras_backend/MetaModel.py | 30 +++++------ n3fit/src/n3fit/layers/losses.py | 46 ++++++++++++---- n3fit/src/n3fit/model_gen.py | 53 ++++++++++++------- n3fit/src/n3fit/performfit.py | 2 + n3fit/src/n3fit/stopping.py | 34 +++++++----- n3fit/src/n3fit/tests/test_losses.py | 4 +- 6 files changed, 110 insertions(+), 59 deletions(-) diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index c7be3f4acc..99f4a640db 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -207,18 +207,6 @@ def losses_fun(): # if it dissapears, equivalent to {k: i.numpy() for k, i in ret.items()} return tf_utils.to_numpy_or_python_type(ret) - - def evaluate(self, x=None, y=None, **kwargs): - """ - Wrapper around evaluate to take into account the case in which the data is already known - when the model is compiled. - """ - x = self._parse_input(self.x_in) - if LEGACY and y is None: - y = self.target_tensors - result = super().evaluate(x=x, y=y, **kwargs) - return result - def compile( self, optimizer_name="RMSprop", @@ -286,16 +274,28 @@ def compile( super(MetaModel, self).compile(optimizer=opt, loss=loss) def make_test_function(self): - """ If the model has been compiled with target data, it creates - a specific evaluate function with the target data already evaluated. - Otherwise return the normal tensorflow behaviour. """ + If the model has been compiled in the normal NNPDF way, + then the output of the prediction is already the loss, so we skip this part + by just summing over predictions. + + Otherwise, just return the usual TF make_test_function + """ + if self.eval_fun is not None: return self.eval_fun if self.target_tensors is None: return super().make_test_function() + @tf.function + def eval_fun(*args): + predictions = self(self._parse_input(None)) + return tf.reduce_sum(predictions) + + self.eval_fun = eval_fun + return eval_fun + # Recover the target tensors and their lengths, we cannot rely # directly on the output from the model as we might have target_tensors # with 0 data points (if the tr/vl mask covers the whole set) diff --git a/n3fit/src/n3fit/layers/losses.py b/n3fit/src/n3fit/layers/losses.py index f56dc0f461..2d89c387c9 100644 --- a/n3fit/src/n3fit/layers/losses.py +++ b/n3fit/src/n3fit/layers/losses.py @@ -5,11 +5,13 @@ from n3fit.backends import MetaLayer from n3fit.backends import operations as op -class L_invcovmat(MetaLayer): + +class LossInvcovmat(MetaLayer): """ Loss function such that: L = \sum_{ij} (yt - yp)_{i} invcovmat_{ij} (yt - yp)_{j} """ + def __init__(self, invcovmat, y_true, **kwargs): self.invcovmat = op.numpy_to_tensor(invcovmat) self.y_true = op.numpy_to_tensor(y_true) @@ -17,10 +19,32 @@ def __init__(self, invcovmat, y_true, **kwargs): def call(self, y_pred, **kwargs): tmp = self.y_true - y_pred - res = op.einsum('bri, ij, brj -> r', tmp, self.invcovmat, tmp) + res = op.einsum("bri, ij, brj -> r", tmp, self.invcovmat, tmp) return res -class L_positivity(MetaLayer): + +class LossLagrange(MetaLayer): + def __init__(self, c=1.0, **kwargs): + self._initial_multiplier = c + super().__init__(**kwargs) + + def build(self, input_shape): + multiplier = MetaLayer.init_constant(self._initial_multiplier) + self.kernel = self.builder_helper("lagMult", (1,), multiplier, trainable=False) + super().build(input_shape) + + def apply_multiplier(self, y): + return self.kernel * y + + def apply_loss(self, y): + return y + + def call(self, y_pred, **kwargs): + y = self.apply_multiplier(y_pred) + return self.apply_loss(y) + + +class LossPositivity(LossLagrange): """ Returns L = elu(y_pred) (considers y_true as 0) @@ -32,21 +56,23 @@ class L_positivity(MetaLayer): the lagrange multiplier is very big. In practice this function can produce results in the range (-alpha, inf) """ + def __init__(self, alpha=1e-7, **kwargs): self.alpha = alpha super().__init__(**kwargs) - def call(self, y_pred, **kwargs): - y = -y_pred - loss = op.backend_function("elu", y, alpha=self.alpha) + def apply_loss(self, y_pred): + loss = op.backend_function("elu", -y_pred, alpha=self.alpha) # Sum over the batch and the datapoints - return op.sum(loss, axis=[0,-1]) + return op.sum(loss, axis=[0, -1]) + -class L_integrability(MetaLayer): +class LossIntegrability(LossLagrange): """ Returns L = (y_pred)*(y_pred) """ - def call(self, y_pred, **kwargs): + + def apply_loss(self, y_pred): y = op.backend_function("square", y_pred) # Sum over the batch and the datapoints - return op.sum(y, axis=[0,-1]) + return op.sum(y, axis=[0, -1]) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 113c148b65..38468ac130 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -17,7 +17,9 @@ from n3fit.backends import base_layer_selector, regularizer_selector -def observable_generator(spec_dict, positivity_initial=1.0, integrability=False): # pylint: disable=too-many-locals +def observable_generator( + spec_dict, positivity_initial=1.0, integrability=False +): # pylint: disable=too-many-locals """ This function generates the observable model for each experiment. These are models which takes as input a PDF tensor (1 x size_of_xgrid x flavours) and outputs @@ -148,7 +150,9 @@ def gen_concat(): concat_vl = concat_ex # creating the experiment as a model turns out to bad for performance - def experiment_layer(pdf, model_obs=model_obs_ex, concat=concat_ex, rotation=None, datasets_out=None): + def experiment_layer( + pdf, model_obs=model_obs_ex, concat=concat_ex, rotation=None, datasets_out=None + ): """ By default works with the experiment observable """ output_layers = [] # First split the pdf layer into the different datasets if needed @@ -177,22 +181,19 @@ def experiment_layer(pdf, model_obs=model_obs_ex, concat=concat_ex, rotation=Non # Now create the model for this experiment full_nx = sum(dataset_xsizes) - loss = lambda x,y: operations.sum(y) - if spec_dict["positivity"]: - out_mask = Mask( - c=positivity_initial, - axis=1, - ) + def loss(y_true, y_pred): + return operations.sum(y_pred) + if spec_dict["positivity"]: if integrability: - loss_pos = losses.L_integrability(name=spec_name) + loss_pos = losses.LossIntegrability(name=spec_name, c=positivity_initial) else: - loss_pos = losses.L_positivity(name=spec_name) + loss_pos = losses.LossPositivity(name=spec_name, c=positivity_initial) def out_positivity(pdf_layer, datasets_out=None): exp_result = experiment_layer(pdf_layer) - return loss_pos(out_mask(exp_result)) + return loss_pos(exp_result) layer_info = { "inputs": model_inputs, @@ -207,9 +208,9 @@ def out_positivity(pdf_layer, datasets_out=None): invcovmat = spec_dict["invcovmat_true"] # Prepare the loss function - loss_tr = losses.L_invcovmat(invcovmat_tr, spec_dict["expdata"], name=tr_name) - loss_vl = losses.L_invcovmat(invcovmat_vl, spec_dict["expdata_vl"], name=vl_name) - loss_ex = losses.L_invcovmat(invcovmat, spec_dict["expdata_true"], name=ex_name) + loss_tr = losses.LossInvcovmat(invcovmat_tr, spec_dict["expdata"], name=tr_name) + loss_vl = losses.LossInvcovmat(invcovmat_vl, spec_dict["expdata_vl"], name=vl_name) + loss_ex = losses.LossInvcovmat(invcovmat, spec_dict["expdata_true"], name=ex_name) # Generate the loss function and rotations of the final data (if any) if spec_dict.get("data_transformation") is not None: @@ -223,13 +224,21 @@ def out_positivity(pdf_layer, datasets_out=None): def out_tr(pdf_layer, datasets_out=None): exp_result = experiment_layer( - pdf_layer, model_obs=model_obs_tr, concat=concat_tr, datasets_out=datasets_out, rotation=obsrot_tr + pdf_layer, + model_obs=model_obs_tr, + concat=concat_tr, + datasets_out=datasets_out, + rotation=obsrot_tr, ) return loss_tr(exp_result) def out_vl(pdf_layer, datasets_out=None): exp_result = experiment_layer( - pdf_layer, model_obs=model_obs_vl, concat=concat_vl, datasets_out=datasets_out, rotation=obsrot_vl + pdf_layer, + model_obs=model_obs_vl, + concat=concat_vl, + datasets_out=datasets_out, + rotation=obsrot_vl, ) return loss_vl(exp_result) @@ -497,7 +506,7 @@ def pdfNN_layer_generator( # Now we need a trainable network per model to be trained in parallel for i in range(parallel_models): - layer_seed = seed + i*number_of_layers + layer_seed = seed + i * number_of_layers if layer_type == "dense": reg = regularizer_selector(regularizer, **regularizer_args) list_of_pdf_layers = generate_dense_network( @@ -514,10 +523,14 @@ def pdfNN_layer_generator( # TODO: this information should come from the basis information # once the basis information is passed to this class list_of_pdf_layers = generate_dense_per_flavour_network( - inp, nodes, activations, initializer_name, seed=layer_seed, basis_size=last_layer_nodes, + inp, + nodes, + activations, + initializer_name, + seed=layer_seed, + basis_size=last_layer_nodes, ) - def dense_me(x): """Takes an input tensor `x` and applies all layers from the `list_of_pdf_layers` in order""" @@ -531,7 +544,7 @@ def dense_me(x): return curr_fun # Preprocessing layer (will be multiplied to the last of the denses) - preproseed = seed + number_of_layers*(i+1) + preproseed = seed + number_of_layers * (i + 1) layer_preproc = Preprocessing( input_shape=(1,), name=f"pdf_prepro_{i}", flav_info=flav_info, seed=preproseed ) diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index 4016690cff..ec59f9e385 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -312,11 +312,13 @@ def performfit( """ > > The stopping point has been at: {0} with a loss of {1} which it got at {2}. Stopping degree {3} + Positivity status: {4} """.format( stopping_object.stop_epoch, stopping_object.vl_chi2, stopping_object.e_best_chi2, stopping_object.stopping_degree, + stopping_object.positivity_status ) ) diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index 95dba489c3..a0e750e403 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -351,6 +351,10 @@ def save_best_replica(self, i, epoch=None): loss = self.get_state(epoch).vl_loss[i] self._replicas[i].register_best(loss, epoch) + def all_positivity_status(self): + """ Returns whether the positivity passed or not per replica """ + return np.array([i.positivity_status for i in self._replicas]) + def all_best_vl_loss(self): """ Returns the best validation loss for each replica """ return np.array([i.best_vl for i in self._replicas]) @@ -474,6 +478,12 @@ def stop_epoch(self): """ Epoch in which the fit is stopped """ return self._history.final_epoch + 1 + @property + def positivity_status(self): + """Returns POS_PASS if positivity passes or veto if it doesn't + for each replica""" + return self._history.all_positivity_status() + def evaluate_training(self, training_model): """Given the training model, evaluates the model and parses the chi2 of the training datasets @@ -535,7 +545,7 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): # this means improving vl_chi2 and passing positivity # Don't start counting until the chi2 of the validation goes below a certain threshold # once we start counting, don't bother anymore - passes = self.count | ( fitstate.vl_chi2 < self.threshold_chi2 ) + passes = self.count | (fitstate.vl_chi2 < self.threshold_chi2) passes &= fitstate.vl_loss < self._history.all_best_vl_loss() # And the ones that pass positivity passes &= self._positivity(fitstate) @@ -659,17 +669,17 @@ def __init__(self, threshold, positivity_sets): def check_positivity(self, history_object): """ - This function receives a history objects and loops over the - positivity_sets to check the value of the positivity loss. - - If the positivity loss is above the threshold, the positivity fails - otherwise, it passes. - It returns an array booleans which are True if positivity passed - - Parameters - ---------- - history_object: dict - dictionary of entries in the form {'name': loss}, output of a MetaModel .fit() + This function receives a history objects and loops over the + positivity_sets to check the value of the positivity loss. + + If the positivity loss is above the threshold, the positivity fails + otherwise, it passes. + It returns an array booleans which are True if positivity passed + story_object[key_loss] < self.threshold + Parameters + ---------- + history_object: dict + dictionary of entries in the form {'name': loss}, output of a MetaModel .fit() """ positivity_pass = True for key in self.positivity_sets: diff --git a/n3fit/src/n3fit/tests/test_losses.py b/n3fit/src/n3fit/tests/test_losses.py index 44a51e9fc3..784c9aeed7 100644 --- a/n3fit/src/n3fit/tests/test_losses.py +++ b/n3fit/src/n3fit/tests/test_losses.py @@ -13,7 +13,7 @@ # Tests loss functions def test_l_invcovmat(): - loss_f = losses.L_invcovmat(INVCOVMAT, ARR1) + loss_f = losses.LossInvcovmat(INVCOVMAT, ARR1) # Add a replica and batch dimension to T2 result = loss_f(np.expand_dims(ARR2, [0, 1])) y = ARR1 - ARR2 @@ -24,7 +24,7 @@ def test_l_invcovmat(): def test_l_positivity(): alpha = 1e-7 - loss_f = losses.L_positivity(alpha=alpha) + loss_f = losses.LossPositivity(alpha=alpha) result = loss_f(np.expand_dims(ARR2, [0, 1])) def elu_sum(yarr_in): From f7ba7e4e28dbf30e90e39dee190c4a12ba8b9f1d Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 5 Jan 2021 13:37:23 +0100 Subject: [PATCH 18/27] rebase to test with json --- n3fit/src/n3fit/io/writer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index 8c325b2910..22f2ecd530 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -96,7 +96,7 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): # export all metadata from the fit to a single yaml file output_file = f"{replica_path_set}/{fitname}.json" json_dict = jsonfit( - replica_status, self.pdf_object, tr_chi2, true_chi2, stop_epoch, self.timings + replica_status, self.pdf_object, tr_chi2, vl_chi2, true_chi2, stop_epoch, self.timings ) with open(output_file, "w") as fs: json.dump(json_dict, fs, indent=2, cls = SuperEncoder) @@ -110,7 +110,7 @@ def default(self, o): return super().default(o) -def jsonfit(replica_status, pdf_object, tr_chi2, true_chi2, epoch_stop, timing): +def jsonfit(replica_status, pdf_object, tr_chi2, vl_chi2, true_chi2, epoch_stop, timing): """Generates a dictionary containing all relevant metadata for the fit Parameters @@ -136,7 +136,7 @@ def jsonfit(replica_status, pdf_object, tr_chi2, true_chi2, epoch_stop, timing): all_info["stop_epoch"] = epoch_stop all_info["best_epoch"] = replica_status.best_epoch all_info["erf_tr"] = tr_chi2 - all_info["erf_vl"] = replica_status.best_vl + all_info["erf_vl"] = vl_chi2 all_info["chi2"] = true_chi2 all_info["pos_state"] = replica_status.positivity_status all_info["arc_lengths"] = pdf_object.compute_arclength().tolist() From a559be12c772a976c38d43b3e426e0f7abf52d46 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 5 Jan 2021 19:35:48 +0100 Subject: [PATCH 19/27] changes to ensure everything (hyperopt, kfolding, diagonal covmat) works --- n3fit/runcards/Basic_runcard_parallel.yml | 106 ++++++ n3fit/runcards/DIS_diagonal_l2reg_example.yml | 2 +- n3fit/src/n3fit/ModelTrainer.py | 321 +++++++----------- .../n3fit/backends/keras_backend/MetaModel.py | 96 ++---- .../backends/keras_backend/operations.py | 4 +- n3fit/src/n3fit/checks.py | 4 +- n3fit/src/n3fit/io/writer.py | 6 +- n3fit/src/n3fit/layers/Rotations.py | 2 +- n3fit/src/n3fit/layers/losses.py | 71 +++- n3fit/src/n3fit/model_gen.py | 52 +-- n3fit/src/n3fit/msr.py | 17 +- n3fit/src/n3fit/performfit.py | 5 +- 12 files changed, 355 insertions(+), 331 deletions(-) create mode 100644 n3fit/runcards/Basic_runcard_parallel.yml diff --git a/n3fit/runcards/Basic_runcard_parallel.yml b/n3fit/runcards/Basic_runcard_parallel.yml new file mode 100644 index 0000000000..52fdcae4a3 --- /dev/null +++ b/n3fit/runcards/Basic_runcard_parallel.yml @@ -0,0 +1,106 @@ +# +# Configuration file for n3fit +# + +############################################################ +description: Basic runcard + +############################################################ +# frac: training fraction +# ewk: apply ewk k-factors +# sys: systematics treatment (see systypes) +dataset_inputs: +- { dataset: SLACP, frac: 0.5} +- { dataset: NMC, frac: 0.5 } +- { dataset: NMCPD, frac: 0.5 } +- { dataset: CMSJETS11, frac: 0.5, sys: 10 } + +############################################################ +datacuts: + t0pdfset : NNPDF31_nlo_as_0118 # PDF set to generate t0 covmat + q2min : 3.49 # Q2 minimum + w2min : 12.5 # W2 minimum + combocuts : NNPDF31 # NNPDF3.0 final kin. cuts + jetptcut_tev : 0 # jet pt cut for tevatron + jetptcut_lhc : 0 # jet pt cut for lhc + wptcut_lhc : 30.0 # Minimum pT for W pT diff distributions + jetycut_tev : 1e30 # jet rap. cut for tevatron + jetycut_lhc : 1e30 # jet rap. cut for lhc + dymasscut_min: 0 # dy inv.mass. min cut + dymasscut_max: 1e30 # dy inv.mass. max cut + jetcfactcut : 1e30 # jet cfact. cut + +############################################################ +theory: + theoryid: 53 # database id + +############################################################ +fitting: + trvlseed: 1 + nnseed: 2 + mcseed: 3 + + genrep: False # true = generate MC replicas, false = use real data + + parameters: # This defines the parameter dictionary that is passed to the Model Trainer + nodes_per_layer: [15, 10, 8] + activation_per_layer: ['sigmoid', 'sigmoid', 'linear'] + initializer: 'glorot_normal' + optimizer: + optimizer_name: 'RMSprop' + learning_rate: 0.01 + clipnorm: 1.0 + epochs: 900 + positivity: + multiplier: 1.05 # When any of the multiplier and/or the initial is not set + initial: # the poslambda will be used instead to compute these values per dataset + threshold: 1e-5 + stopping_patience: 0.30 # percentage of the number of epochs + layer_type: 'dense' + dropout: 0.0 + threshold_chi2: 5.0 + + # NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7) + # EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7) + # EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7) + # FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7) + fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc. + basis: + # remeber to change the name of PDF accordingly with fitbasis + # pos: True for NN squared + # mutsize: mutation size + # mutprob: mutation probability + # smallx, largex: preprocessing ranges + - { fl: sng, pos: False, mutsize: [15], mutprob: [0.05], smallx: [1.05,1.19], largex: [1.47,2.70], trainable: False } + - { fl: g, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.94,1.25], largex: [0.11,5.87], trainable: False } + - { fl: v, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.54,0.75], largex: [1.15,2.76], trainable: False } + - { fl: v3, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.21,0.57], largex: [1.35,3.08] } + - { fl: v8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.52,0.76], largex: [0.77,3.56], trainable: True } + - { fl: t3, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.37,1.52], largex: [1.74,3.39] } + - { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.56,1.29], largex: [1.45,3.03] } + - { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.12,1.19], largex: [1.83,6.70] } + +############################################################ +positivity: + posdatasets: + - { dataset: POSF2U, poslambda: 1e6 } # Positivity Lagrange Multiplier + - { dataset: POSFLL, poslambda: 1e4 } + +############################################################ +integrability: + integdatasets: + - {dataset: INTEGXT3, poslambda: 1e2} + +############################################################ +lhagrid: + nx : 150 + xmin: 1e-9 + xmed: 0.1 + xmax: 1.0 + nq : 50 + qmax: 1e5 + +############################################################ +debug: True +maxcores: 8 +parallel_models: 2 diff --git a/n3fit/runcards/DIS_diagonal_l2reg_example.yml b/n3fit/runcards/DIS_diagonal_l2reg_example.yml index f392199f07..7b56a2fdc9 100644 --- a/n3fit/runcards/DIS_diagonal_l2reg_example.yml +++ b/n3fit/runcards/DIS_diagonal_l2reg_example.yml @@ -76,7 +76,7 @@ fitting: optimizer: learning_rate: 1.0 optimizer_name: 'Adadelta' - epochs: 40000 + epochs: 4000 positivity: multiplier: 1.09 initial: 10.0 diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py index e307be3401..755011bbfb 100644 --- a/n3fit/src/n3fit/ModelTrainer.py +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -9,6 +9,7 @@ between iterations while at the same time keeping the amount of redundant calls to a minimum """ import logging +from itertools import zip_longest import numpy as np import n3fit.model_gen as model_gen from n3fit.backends import MetaModel, clear_backend_state, operations, callbacks @@ -29,101 +30,12 @@ PUSH_INTEGRABILITY_EACH = 100 -def _assign_data_to_model(model, data_dict, fold_k=0): - """ - Reads the data dictionary (``data_dict``) and assings the target data to the model - It returns a dictionary containing: - { - 'model': the backend.MetaModel to be trained - 'target_ndata': an array of target output - 'ndata': the number of datapoints - 'losses': the list of loss functions of the model - } - - If kfolding is active applies the (``fold_k``) fold to the target data. - in this case ndata is a count of the non_zero entries of the fold - - Note: this function is transitional. Eventually a validphys action should - provide an experiment object with a .target_data(fold_indx) method which - should return the necessary information: - - number of datapoints - - target data with the right entries set to 0* - *or masked away if it is able to also return a list of loss functions that will mask away - the corresponding entries of the prediction - - - Parameters - ---------- - model: backend.MetaModel - model to be added to the dictionary - data_dict: dict - dictionary containing: { - 'expdata' : list of experimental data which the model will target, - 'folds' : a list (size=expdata) of lists (size=kfolds) with the folding masks) - 'losses': a list of loss functions for the model - } - fold_k: int - when kfolding, index of the fold, so that for every experiment we apply the - folds[index_experiment][mask] - - Returns - ------- - ret: dict - dictionary containing the model and its associated ndata and loss - """ - # Array with all data - all_data = data_dict["expdata"] - # Each element of this list correspond to the set of folds for one experiment - all_folds = data_dict["folds"] - n_exps = len(all_folds) - # Now set to 0 the data folded away - active_data = [] - ndata = 0 - for exp_data, exp_fold in zip(all_data, all_folds): - if exp_fold: - mask = exp_fold[fold_k] - active_data.append(exp_data * mask) - ndata += np.count_nonzero(mask) - else: - active_data.append(exp_data) - ndata += exp_data.size - # There might be special outputs (like positivitiy) that is not - # affected by the folding. - # They don't count for the chi2 (which is only for reporting) - active_data += all_data[n_exps:] - ret = { - "model": model, - "target_data": active_data, - "ndata": ndata, - "losses": data_dict["losses"], - } - return ret - - -def _model_compilation(models, optimizer_params): - """ - Compiles all models - - Parameters - ---------- - models: list(dict) - A ditionary defining the model - optimizer_params: dict - Optimizer parameters to be passes to the compile method of the model - """ - for _, model_dict in models.items(): - model = model_dict["model"] - target = model_dict["target_data"] - losses = model_dict["losses"] - model.compile(loss=losses, target_output=target, **optimizer_params) - - -def _pdf_injection(pdf_layers, observables, datasets_out=None): +def _pdf_injection(pdf_layers, observables, masks): """ Takes as input a list of output layers and returns a corresponding list where all output layers call the pdf layer at self.pdf_layer """ - return [f(x, datasets_out=datasets_out) for f, x in zip(observables, pdf_layers)] + return [f(x, mask=m) for f, x, m in zip_longest(observables, pdf_layers, masks)] def _LM_initial_and_multiplier(input_initial, input_multiplier, max_lambda, steps): @@ -183,16 +95,34 @@ def __init__( """ Parameters ---------- - exp_info: list of dictionaries containing experiments - pos_info: list of dictionaries containing positivity sets - integ_info: list of dictionaries containing integrability sets - flavinfo: the object returned by fitting['basis'] - nnseed: the seed used to initialise the Neural Network, will be passed to model_gen - pass_status: flag to signal a good run - failed_status: flag to signal a bad run - debug: flag to activate some debug options - model_file: str whether to save the models - sum_rules: str whether sum rules should be enabled (All, MSR, VSR, False) + exp_info: list + list of dictionaries containing experiments + pos_info: list + list of dictionaries containing positivity sets + integ_info: list + list of dictionaries containing integrability sets + flavinfo: list + the object returned by fitting['basis'] + fitbasis: str + the name of the basis being fitted + nnseed: int + the seed used to initialise the Neural Network, will be passed to model_gen + pass_status: str + flag to signal a good run + failed_status: str + flag to signal a bad run + debug: bool + flag to activate some debug options + kfold_parameters: dict + parameters defining the kfolding method + max_cores: int + maximum number of cores the fitting can use to run + model_file: str + whether to save the models + sum_rules: str + whether sum rules should be enabled (All, MSR, VSR, False) + parallel_models: int + number of models to fit in parallel """ # Save all input information @@ -249,7 +179,6 @@ def __init__( self.training = { "output": [], "expdata": [], - "losses": [], "ndata": 0, "model": None, "posdatasets": [], @@ -263,7 +192,6 @@ def __init__( self.validation = { "output": [], "expdata": [], - "losses": [], "ndata": 0, "model": None, "folds": [], @@ -272,13 +200,10 @@ def __init__( self.experimental = { "output": [], "expdata": [], - "losses": [], "ndata": 0, "model": None, "folds": [], } - self.model_dicts = None - self._fill_the_dictionaries() if self.validation["ndata"] == 0: @@ -357,33 +282,37 @@ def _fill_the_dictionaries(self): self.training["expdata"].append(integ_dict["expdata"]) self.training["integdatasets"].append(integ_dict["name"]) - def _model_generation(self, pdf_models, partition): + def _model_generation(self, pdf_models, partition, partition_idx): """ Fills the three dictionaries (``training``, ``validation``, ``experimental``) - with the ``model`` entry - + with the ``model`` entry. Compiles the validation and experimental models with fakes optimizers and learning rate as they are never trained, but this is needed by some backends - in order to run evaluate on them + in order to run evaluate on them. Before entering this function the dictionaries contain a list of inputs and a list of outputs, but they are not connected. This function connects inputs with outputs by injecting the PDF. At this point we have a PDF model that takes an input (1, None, 1) - and outputs in return (1, none, 14) + and outputs in return (1, none, 14). The injection of the PDF is done by concatenating all inputs and calling pdf_model on it. This in turn generates an output_layer that needs to be splitted for every experiment as we have a set of observable "functions" that each take (1, exp_xgrid_size, 14) and output (1, masked_ndata) where masked_ndata can be the training/validation - or the experimental mask (in which cased masked_ndata == ndata) + or the experimental mask (in which cased masked_ndata == ndata). + + Several models can be fitted at once by passing a list of models with a shared input + this function will give the same input to every model and will concatenate the output at the end + so that the final output of the model is (1, None, 14, n) (with n=number of parallel models) + Parameters ---------- - pdf_model: MetaModel - model producing pdf values + pdf_models: list(n3fit.backend.MetaModel) + a list of models that produce PDF values Returns ------- @@ -424,21 +353,28 @@ def _model_generation(self, pdf_models, partition): # If we are in a kfolding partition, select which datasets are out if partition: - kfold_datasets = partition["datasets"] - negate_k_datasets = [d for d in self.all_datasets if d not in kfold_datasets] + training_mask = [i[partition_idx] for i in self.training["folds"]] + validation_mask = [i[partition_idx] for i in self.validation["folds"]] + experimental_mask = [i[partition_idx] for i in self.experimental["folds"]] else: - kfold_datasets = None - negate_k_datasets = None + training_mask = validation_mask = experimental_mask = [None] # Training and validation leave out the kofld dataset # experiment leaves out the negation - output_tr = _pdf_injection(splitted_pdf, self.training["output"], kfold_datasets) + output_tr = _pdf_injection(splitted_pdf, self.training["output"], training_mask) training = MetaModel(full_model_input_dict, output_tr) - output_vl = _pdf_injection(splitted_pdf, self.validation["output"], kfold_datasets) + # For validation we don't have integrability + n_val = len(self.validation["output"]) + output_vl = _pdf_injection(splitted_pdf[:n_val], self.validation["output"], validation_mask) validation = MetaModel(full_model_input_dict, output_vl) - output_ex = _pdf_injection(splitted_pdf, self.experimental["output"], negate_k_datasets) + # And for experimental we don't have positivity + # TODO: risky to rely that much in the order + n_exps = len(self.experimental["output"]) + output_ex = _pdf_injection( + splitted_pdf[:n_exps], self.experimental["output"], experimental_mask + ) experimental = MetaModel(full_model_input_dict, output_ex) @@ -463,7 +399,7 @@ def _reset_observables(self): """ self.input_list = [] self.input_sizes = [] - for key in ["output", "losses", "posmultipliers", "integmultipliers"]: + for key in ["output", "posmultipliers", "integmultipliers"]: self.training[key] = [] self.validation[key] = [] self.experimental[key] = [] @@ -519,10 +455,6 @@ def _generate_observables( self.validation["output"].append(exp_layer["output_vl"]) self.experimental["output"].append(exp_layer["output"]) - self.training["losses"].append(exp_layer["loss_tr"]) - self.validation["losses"].append(exp_layer["loss_vl"]) - self.experimental["losses"].append(exp_layer["loss"]) - # Generate the positivity penalty for pos_dict in self.pos_info: if not self.mode_hyperopt: @@ -542,9 +474,7 @@ def _generate_observables( # The positivity should be on both training and validation models self.training["output"].append(pos_layer["output_tr"]) - self.training["losses"].append(pos_layer["loss_tr"]) self.validation["output"].append(pos_layer["output_tr"]) - self.validation["losses"].append(pos_layer["loss_tr"]) self.training["posmultipliers"].append(pos_multiplier) self.training["posinitials"].append(pos_initial) @@ -571,7 +501,6 @@ def _generate_observables( # The integrability all falls to the training self.training["output"].append(integ_layer["output_tr"]) - self.training["losses"].append(integ_layer["loss_tr"]) self.training["integmultipliers"].append(integ_multiplier) self.training["integinitials"].append(integ_initial) @@ -634,29 +563,10 @@ def _generate_pdf( regularizer=regularizer, regularizer_args=regularizer_args, impose_sumrule=self.impose_sumrule, - parallel_models=self.parallel_models + parallel_models=self.parallel_models, ) return pdf_models - def _assign_data(self, models, fold_k=0): - """Assign to each model the data to compare with as well as the - number of data points in the model. - In the most general case training and validation get assigned the replic'd data - while experimental gets the actual data. - In the kfolding case (i.e, partition != None), they all receive the same data - but the folded data is set to 0 for training and validation - """ - training = _assign_data_to_model(models["training"], self.training, fold_k) - validation = _assign_data_to_model(models["validation"], self.validation, fold_k) - experimental = _assign_data_to_model(models["experimental"], self.experimental, fold_k) - - ret = { - "training": training, - "validation": validation, - "experimental": experimental, - } - return ret - def _prepare_reporting(self, partition): """Parses the information received by the :py:class:`n3fit.ModelTrainer.ModelTrainer` to select the bits necessary for reporting the chi2. @@ -706,9 +616,11 @@ def _train_and_fit(self, training_model, stopping_object, epochs=100): callbacks=self.callbacks + [callback_st, callback_pos, callback_integ], ) - # TODO: this needs to be changed for hyperopt - return self.pass_status -# return self.failed_status + # TODO: in order to use multireplica in hyperopt is is necessary to define what "passing" means + # for now consider the run as good if any replica passed + if any([bool(i) for i in stopping_object.e_best_chi2]): + return self.pass_status + return self.failed_status def _hyperopt_override(self, params): """ Unrolls complicated hyperopt structures into very simple dictionaries""" @@ -719,8 +631,8 @@ def _hyperopt_override(self, params): for key, value in item.items(): params[key] = value - def enable_tensorboard(self, logdir, weight_freq = 0, profiling=False): - """ Enables tensorboard callback for further runs of the fitting procedure + def enable_tensorboard(self, logdir, weight_freq=0, profiling=False): + """Enables tensorboard callback for further runs of the fitting procedure Parameters ---------- @@ -731,7 +643,9 @@ def enable_tensorboard(self, logdir, weight_freq = 0, profiling=False): profiling: bool flag to enable the tensorboard profiler """ - callback_tb = callbacks.gen_tensorboard_callback(logdir, profiling=profiling, histogram_freq=weight_freq) + callback_tb = callbacks.gen_tensorboard_callback( + logdir, profiling=profiling, histogram_freq=weight_freq + ) self.callbacks.append(callback_tb) def evaluate(self, stopping_object): @@ -749,15 +663,13 @@ def evaluate(self, stopping_object): val_chi2 : chi2 of the validation set exp_chi2: chi2 of the experimental data (without replica or tr/vl split) """ - if self.model_dicts is None: + if self.training["model"] is None: raise RuntimeError("Modeltrainer.evaluate was called before any training") # Needs to receive a `stopping_object` in order to select the part of the # training and the validation which are actually `chi2` and not part of the penalty - training = self.model_dicts["training"] - train_chi2 = stopping_object.evaluate_training(training["model"]) + train_chi2 = stopping_object.evaluate_training(self.training["model"]) val_chi2 = stopping_object.vl_chi2 - experimental = self.model_dicts["experimental"] - exp_chi2 = experimental["model"].compute_losses()["loss"] / experimental["ndata"] + exp_chi2 = self.experimental["model"].compute_losses()["loss"] / self.experimental["ndata"] return train_chi2, val_chi2, exp_chi2 def hyperparametrizable(self, params): @@ -835,7 +747,7 @@ def hyperparametrizable(self, params): # Model generation joins all the different observable layers # together with pdf model generated above - models = self._model_generation(pdf_models, partition) + models = self._model_generation(pdf_models, partition, k) # Only after model generation, apply possible weight file # TODO: not sure whether it is a good idea that all of them start at the same point @@ -850,17 +762,12 @@ def hyperparametrizable(self, params): initial_values = self.training["posinitials"] + self.training["posinitials"] models["training"].reset_layer_weights_to(pos_and_int, initial_values) - # Assign data to each model - # model dicts is similar to model but includes information about - # the target data and number of points - model_dicts = self._assign_data(models, k) - # Generate the list containing reporting info necessary for chi2 reporting = self._prepare_reporting(partition) if self.no_validation: # Substitute the validation model with the training model - model_dicts["validation"] = model_dicts["training"] + models["validation"] = models["training"] validation_model = models["training"] else: validation_model = models["validation"] @@ -874,11 +781,12 @@ def hyperparametrizable(self, params): total_epochs=epochs, stopping_patience=stopping_epochs, threshold_positivity=threshold_pos, - threshold_chi2=threshold_chi2 + threshold_chi2=threshold_chi2, ) # Compile each of the models with the right parameters - _model_compilation(model_dicts, params["optimizer"]) + for model in models.values(): + model.compile(**params["optimizer"]) passed = self._train_and_fit( models["training"], @@ -886,14 +794,18 @@ def hyperparametrizable(self, params): epochs=epochs, ) - # Save validation chi2 - validation_loss = stopping_object.vl_chi2 + if self.mode_hyperopt: + # TODO: currently only working for one single replica + # If doing a hyperparameter scan we need to keep track at this point of the loss function + validation_loss = stopping_object.vl_chi2 - # Compute experimental loss - exp_loss_raw = np.take(models["experimental"].compute_losses()["loss"], -1) - experimental_loss = exp_loss_raw / model_dicts["experimental"]["ndata"] + # Compute experimental loss + exp_loss_raw = np.take(models["experimental"].compute_losses()["loss"], -1) + # And divide by the number of active points in this fold + # it would be nice to have a ndata_per_fold variable coming in the vp object... + ndata = np.sum([np.count_nonzero(i[k]) for i in self.experimental["folds"]]) + experimental_loss = exp_loss_raw / ndata - if self.mode_hyperopt: hyper_loss = experimental_loss if passed != self.pass_status: log.info("Hyperparameter combination fail to find a good fit, breaking") @@ -902,46 +814,45 @@ def hyperparametrizable(self, params): for penalty in self.hyper_penalties: hyper_loss += penalty(pdf_model, stopping_object) l_hyper.append(hyper_loss) - log.info("Fold %d finished, loss=%.1f, pass=%s", k+1, hyper_loss, passed) + log.info("Fold %d finished, loss=%.1f, pass=%s", k + 1, hyper_loss, passed) if hyper_loss > self.hyper_threshold: - log.info("Loss above threshold (%.1f > %.1f), breaking", hyper_loss, self.hyper_threshold) + log.info( + "Loss above threshold (%.1f > %.1f), breaking", + hyper_loss, + self.hyper_threshold, + ) # Apply a penalty proportional to the number of folds that have not been computed pen_mul = len(self.kpartitions) - k l_hyper = [i * pen_mul for i in l_hyper] break - # Save all losses - l_valid.append(validation_loss) - l_exper.append(experimental_loss) - - dict_out = { - "status": passed, - "validation_loss": np.average(l_valid), - "experimental_loss": np.average(l_exper), - } + # Save all losses + l_valid.append(validation_loss) + l_exper.append(experimental_loss) if self.mode_hyperopt: - dict_out["loss"] = self.hyper_loss(l_hyper) - dict_out["kfold_meta"] = { - "validation_losses": l_valid, - "experimental_losses": l_exper, - "hyper_losses": l_hyper, + # Hyperopt needs a dictionary with information about the losses + # it is possible to store arbitrary information in the trial file by adding it to this dictionary + dict_out = { + "status": passed, + "loss": self.hyper_loss(l_hyper), + "validation_loss": np.average(l_valid), + "experimental_loss": np.average(l_exper), + "kfold_meta": { + "validation_losses": l_valid, + "experimental_losses": l_exper, + "hyper_losses": l_hyper, + }, } - # If we are using hyperopt we don't need to output any other information return dict_out - dict_out["loss"] = experimental_loss - - # Add to the output dictionary things that are needed by performfit.py - # to generate the output pdf, check the arc-length, gather stats, etc - # some of them are already attributes of the class so they are redundant here - # but I think it's good to present them explicitly - dict_out["stopping_object"] = stopping_object - dict_out["experimental"] = self.experimental - dict_out["training"] = self.training - dict_out["pdf_models"] = pdf_models - - # Only after the training has finished, we save all models for future reporting - self.model_dicts = model_dicts + # Keep a reference to the models after training for future reporting + self.training["model"] = models["training"] + self.experimental["model"] = models["experimental"] + self.validation["model"] = models["validation"] + # In a normal run, the only information we need to output is the stopping object + # (which contains metadata about the stopping) + # and the pdf models (which are used to generate the PDF grids and compute arclengths) + dict_out = {"status": passed, "stopping_object": stopping_object, "pdf_models": pdf_models} return dict_out diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index 99f4a640db..23b3125cd5 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -10,14 +10,7 @@ from tensorflow.keras.models import Model from tensorflow.keras import optimizers as Kopt from tensorflow.python.keras.utils import tf_utils -from n3fit.backends.keras_backend.operations import numpy_to_tensor - -# Check the TF version to check if legacy-mode is needed (TF < 2.2) -tf_version = tf.__version__.split('.') -if int(tf_version[0]) == 2 and int(tf_version[1]) < 2: - LEGACY = True -else: - LEGACY = False +from n3fit.backends.keras_backend import operations as op # Define in this dictionary new optimizers as well as the arguments they accept # (with default values if needed be) @@ -36,6 +29,12 @@ v[1]["clipnorm"] = 1.0 +def _default_loss(y_true, y_pred): + """ Default loss to be used when the model is compiled with loss = Null + (for instance if the prediction of the model is already the loss """ + return op.sum(y_pred) + + def _fill_placeholders(original_input, new_input=None): """ Fills the placeholders of the original input with a new set of input @@ -114,7 +113,7 @@ def __init__(self, input_tensors, output_tensors, **kwargs): # otherwise, put a placeholder None as it will come from the outside name = input_tensor.name.rsplit(":",1)[0] try: - self.x_in[name] = numpy_to_tensor(input_tensor.tensor_content) + self.x_in[name] = op.numpy_to_tensor(input_tensor.tensor_content) self.tensors_in[name] = input_tensor except AttributeError: self.x_in[name] = None @@ -123,7 +122,6 @@ def __init__(self, input_tensors, output_tensors, **kwargs): self.all_inputs = input_list self.all_outputs = output_list self.target_tensors = None - self.eval_fun = None self.compute_losses_function = None def _parse_input(self, extra_input=None, pass_content=True): @@ -159,7 +157,7 @@ def perform_fit(self, x=None, y=None, epochs=1, **kwargs): x = self._parse_input(self.x_in) if y is None: y = self.target_tensors - history = super().fit(x=x, y=y, epochs=epochs, **kwargs,) + history = self.fit(x=x, y=y, epochs=epochs, **kwargs) loss_dict = history.history return loss_dict @@ -176,7 +174,7 @@ def compute_losses(self): The losses reported in the ``evaluate`` method for n3fit are, however, summed over replicas. Instead the loss we are interested in is usually the output of the model (i.e., predict) - This function then generates a dictionary of partial losses of the model separated per replica. + This function then generates a dict of partial losses of the model separated per replica. i.e., the output for experiment {'LHC_exp'} will be an array of Nrep elements. Returns @@ -184,8 +182,9 @@ def compute_losses(self): dict a dictionary with all partial losses of the model """ - # TODO might not work for TF < 2.2, we might not care either + # TODO might not work for TF < 2.2 if self.compute_losses_function is None: + # If it is the first time we are passing through, compile the function and save it out_names = [f"{i}_loss" for i in self.output_names] out_names.insert(0, "loss") @@ -203,8 +202,11 @@ def losses_fun(): self.compute_losses_function = losses_fun ret = self.compute_losses_function() - # undocumented TF function that converts all the tensors from the ret dictionary to numpy arrays - # if it dissapears, equivalent to {k: i.numpy() for k, i in ret.items()} + + # The output of this function is to be used by python (and numpy) so we need to convert + # the tensorflow variable to python primitives or numpy arrays. + # Undocumented TF function that converts all the tensors from the ret dictionary to numpy arrays + # if it dissapears, equivalent for us to {k: i.numpy() for k, i in ret.items()} return tf_utils.to_numpy_or_python_type(ret) def compile( @@ -248,6 +250,9 @@ def compile( f"[MetaModel.select_initializer] optimizer not implemented: {optimizer_name}" ) from e + if loss is None: + loss = _default_loss + opt_function = opt_tuple[0] opt_args = opt_tuple[1] @@ -261,69 +266,16 @@ def compile( # Instantiate the optimizer opt = opt_function(**opt_args) - # If given target output, compile it together with the model for better performance - if target_output is not None: + # If given target output is None, target_output is unnecesary, save just a zero per output + if target_output is None: + self.target_tensors = [np.zeros((1,1)) for i in self.output_shape] + else: if not isinstance(target_output, list): target_output = [target_output] - # Tensorize self.target_tensors = target_output - # Reset the evaluation function (if any) - self.eval_fun = None - super(MetaModel, self).compile(optimizer=opt, loss=loss) - def make_test_function(self): - """ - If the model has been compiled in the normal NNPDF way, - then the output of the prediction is already the loss, so we skip this part - by just summing over predictions. - - Otherwise, just return the usual TF make_test_function - """ - - if self.eval_fun is not None: - return self.eval_fun - - if self.target_tensors is None: - return super().make_test_function() - - @tf.function - def eval_fun(*args): - predictions = self(self._parse_input(None)) - return tf.reduce_sum(predictions) - - self.eval_fun = eval_fun - return eval_fun - - # Recover the target tensors and their lengths, we cannot rely - # directly on the output from the model as we might have target_tensors - # with 0 data points (if the tr/vl mask covers the whole set) - lens = [] - tt = [] - for target in self.target_tensors: - lens.append(target.size) - tt.append(numpy_to_tensor(target)) - # Save target_tensors as tensors, as it might be useful for LEGACY - self.target_tensors = tt - - # Get the name of the output layer - # and add the suffix _loss to match TF behaviour - out_names = [f"{i}_loss" for i in self.output_names] - out_names.insert(0, "loss") - - @tf.function - def eval_fun(*args): - predictions = self(self._parse_input(None)) - loss_list = [lfun(target, pred) for target, pred, lfun in zip(tt, predictions, self.loss)] - ret = [tf.reduce_sum(loss_list)] + loss_list - return dict(zip(out_names, ret)) - - # Save the function so we don't go through this again - self.eval_fun = eval_fun - - return eval_fun - def set_masks_to(self, names, val=0.0): """ Set all mask value to the selected value Masks in MetaModel should be named {name}_mask diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py index 35f6a632fb..3e2308e19c 100644 --- a/n3fit/src/n3fit/backends/keras_backend/operations.py +++ b/n3fit/src/n3fit/backends/keras_backend/operations.py @@ -266,7 +266,6 @@ def pdf_masked_convolution(raw_pdf, basis_mask): return pdf_x_pdf - def tensor_product(*args, **kwargs): """ Computes the tensordot product between tensor_x and tensor_y @@ -283,7 +282,6 @@ def einsum(equation, *args, **kwargs): return tf.einsum(equation, *args, **kwargs) - @tf.function(experimental_relax_shapes=True) def op_log(o_tensor, **kwargs): """ @@ -321,7 +319,7 @@ def scatter_to_one(values, indices=[[1]], output_dim=14): @tf.function def backend_function(fun_name, *args, **kwargs): """ - Calls the (``fun_name``) backend function + Wrapper to call non-explicitly implemented backend functions by name: (``fun_name``) see full `docs `_ for some possibilities """ fun = getattr(K, fun_name) diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py index 31fdae7929..69c5c43290 100644 --- a/n3fit/src/n3fit/checks.py +++ b/n3fit/src/n3fit/checks.py @@ -355,7 +355,7 @@ def check_consistent_basis(fitting, theoryid): @make_argcheck -def can_run_in_parallel(fitting, replica, parallel_models=1): +def can_run_in_parallel(fitting, replica, hyperopt, parallel_models=1): """ Checks whether a runcard which is trying to run several replicas at once (parallel_models =/= 1) is valid """ rp = len(replica) @@ -364,6 +364,8 @@ def can_run_in_parallel(fitting, replica, parallel_models=1): raise CheckError("Can't run more than one replica at once if no replicas are to be generated") if parallel_models == 1: return + if hyperopt: + raise CheckError("Running replicas in parallel with hyperopt is still not supported") if genrep: raise CheckError("Replica generation is not supported yet for parallel models") if fitting["parameters"].get("layer_type") != "dense": diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index 22f2ecd530..1b28a01de8 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -110,7 +110,7 @@ def default(self, o): return super().default(o) -def jsonfit(replica_status, pdf_object, tr_chi2, vl_chi2, true_chi2, epoch_stop, timing): +def jsonfit(replica_status, pdf_object, tr_chi2, vl_chi2, true_chi2, stop_epoch, timing): """Generates a dictionary containing all relevant metadata for the fit Parameters @@ -126,6 +126,8 @@ def jsonfit(replica_status, pdf_object, tr_chi2, vl_chi2, true_chi2, epoch_stop, chi2 for the validation true_chi2: float chi2 for the exp (unreplica'd data) + epoch_stop: int + epoch at which the stopping stopped (not the one for the best fit!) timing: dict dictionary of the timing of the different events that happened """ @@ -133,7 +135,7 @@ def jsonfit(replica_status, pdf_object, tr_chi2, vl_chi2, true_chi2, epoch_stop, # Generate preprocessing information all_info["preprocessing"] = pdf_object.get_preprocessing_factors() # .fitinfo-like info - all_info["stop_epoch"] = epoch_stop + all_info["stop_epoch"] = stop_epoch all_info["best_epoch"] = replica_status.best_epoch all_info["erf_tr"] = tr_chi2 all_info["erf_vl"] = vl_chi2 diff --git a/n3fit/src/n3fit/layers/Rotations.py b/n3fit/src/n3fit/layers/Rotations.py index 0ccca32c0b..c5f08a1a49 100644 --- a/n3fit/src/n3fit/layers/Rotations.py +++ b/n3fit/src/n3fit/layers/Rotations.py @@ -66,7 +66,7 @@ class FkRotation(MetaLayer): # i.e., create the matrix and inherit from the Rotation layer above def __init__(self, output_dim=14, name="evolution", **kwargs): self.output_dim = output_dim - super().__init__(name, **kwargs) + super().__init__(name=name, **kwargs) def call(self, pdf_raw): # Transpose the PDF so that the flavour index is the first one diff --git a/n3fit/src/n3fit/layers/losses.py b/n3fit/src/n3fit/layers/losses.py index 2d89c387c9..78b7f3e0ea 100644 --- a/n3fit/src/n3fit/layers/losses.py +++ b/n3fit/src/n3fit/layers/losses.py @@ -1,7 +1,13 @@ """ Module containg the losses to be apply to the models as layers -""" + The layer take the input from the model and acts on it producing a score function. + For instance, in the case of the chi2 (``LossInvcovmat``) the function takes only + the prediction of the model and, during instantiation, took the real data to compare with + and the covmat. + +""" +import numpy as np from n3fit.backends import MetaLayer from n3fit.backends import operations as op @@ -10,20 +16,57 @@ class LossInvcovmat(MetaLayer): """ Loss function such that: L = \sum_{ij} (yt - yp)_{i} invcovmat_{ij} (yt - yp)_{j} + + Takes as argument the inverse of the covmat and the target data. + It also takes an optional argument to mask part of the predictions + + Example + ------- + >>> import numpy as np + >>> from n3fit.layers import losses + >>> C = np.random.rand(5,5) + >>> data = np.random.rand(1, 1, 5) + >>> pred = np.random.rand(1, 1, 5) + >>> invC = np.linalg.inv( C @ C.T) + >>> loss_f = losses.LossInvcovmat(invC, data) + >>> loss_f(pred).shape == 1 + True """ - def __init__(self, invcovmat, y_true, **kwargs): + def __init__(self, invcovmat, y_true, mask=None, **kwargs): + # If we have a diagonal matrix, padd with 0s and hope it's not too heavy on memory + if len(invcovmat.shape) == 1: + invcovmat = np.diag(invcovmat) self.invcovmat = op.numpy_to_tensor(invcovmat) self.y_true = op.numpy_to_tensor(y_true) + if mask is None or all(mask): + self.mask = None + else: + mask = np.array(mask, dtype=np.float32).reshape(1,1,-1) + self.mask = op.numpy_to_tensor(mask) super().__init__(**kwargs) def call(self, y_pred, **kwargs): tmp = self.y_true - y_pred + if self.mask is not None: + tmp = op.op_multiply([tmp, self.mask]) res = op.einsum("bri, ij, brj -> r", tmp, self.invcovmat, tmp) return res class LossLagrange(MetaLayer): + """ + Abstract loss function to apply lagrange multipliers to a model. + + L = \lambda * f(y) + + The form of f(y) is given by modifying the ``apply_loss`` method. + It is possible to modify how the multiplication of the lambda factor is implemented + by modifying the ``apply_multiplier`` method. + + The (non trainable) weight containing the multiplier is named ``lagMult``. + """ + def __init__(self, c=1.0, **kwargs): self._initial_multiplier = c super().__init__(**kwargs) @@ -46,7 +89,7 @@ def call(self, y_pred, **kwargs): class LossPositivity(LossLagrange): """ - Returns L = elu(y_pred) (considers y_true as 0) + Returns L = \lambda*elu(y_pred) The positivity loss is computed by inverting the sign of the datapoints and then applying the elu function, this function is @@ -55,6 +98,19 @@ class LossPositivity(LossLagrange): This is done to avoid a big discontinuity in the derivative at 0 when the lagrange multiplier is very big. In practice this function can produce results in the range (-alpha, inf) + + Example + ------- + >>> import numpy as np + >>> from n3fit.layers import losses + >>> pred = np.random.rand(1, 1, 5) + >>> alpha = 1e-7 + >>> c = 1e8 + >>> loss_f = losses.LossPositivity(c=c, alpha=alpha) + >>> loss_f(pred) == -5*alpha + True + >>> loss_f(-pred) > c + True """ def __init__(self, alpha=1e-7, **kwargs): @@ -70,6 +126,15 @@ def apply_loss(self, y_pred): class LossIntegrability(LossLagrange): """ Returns L = (y_pred)*(y_pred) + + Example + ------- + >>> import numpy as np + >>> from n3fit.layers import losses + >>> pred = np.random.rand(1, 1, 5) + >>> loss_f = losses.LossIntegrability(c=1e2) + >>> loss_f(pred) > 0 + True """ def apply_loss(self, y_pred): diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 38468ac130..4484b42472 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -134,11 +134,6 @@ def observable_generator( def gen_concat(): return operations.as_layer(operations.concatenate, op_kwargs={"axis": 2}) - # Tensorflow operations have ugly name, - # we want the final observables to be named just {spec_name} (with'val/exp' if needed) - tr_name = spec_name - vl_name = f"{spec_name}_val" - ex_name = f"{spec_name}_exp" concat_ex = gen_concat() # For data transformation all concatenations are the same @@ -167,12 +162,7 @@ def experiment_layer( else: split_pdf = [pdf] # every obs gets its share of the split - for partial_pdf, obs in zip(split_pdf, model_obs): - obs_output = obs(partial_pdf) - if datasets_out and obs.name[4:] in datasets_out: - mask_out = Mask(c=0.0, name=f"zero_{obs.name}") - obs_output = mask_out(obs_output) - output_layers.append(obs_output) + output_layers = [obs(p_pdf) for p_pdf, obs in zip(split_pdf, model_obs)] # Concatenate all datasets as experiments are one single entity if needed ret = concat(output_layers) if rotation is not None: @@ -182,77 +172,71 @@ def experiment_layer( # Now create the model for this experiment full_nx = sum(dataset_xsizes) - def loss(y_true, y_pred): - return operations.sum(y_pred) - if spec_dict["positivity"]: if integrability: loss_pos = losses.LossIntegrability(name=spec_name, c=positivity_initial) else: loss_pos = losses.LossPositivity(name=spec_name, c=positivity_initial) - def out_positivity(pdf_layer, datasets_out=None): + def out_positivity(pdf_layer, mask=None, datasets_out=None): exp_result = experiment_layer(pdf_layer) return loss_pos(exp_result) layer_info = { "inputs": model_inputs, "output_tr": out_positivity, - "loss_tr": loss, "experiment_xsize": full_nx, } return layer_info - invcovmat_tr = spec_dict["invcovmat"] - invcovmat_vl = spec_dict["invcovmat_vl"] - invcovmat = spec_dict["invcovmat_true"] - - # Prepare the loss function - loss_tr = losses.LossInvcovmat(invcovmat_tr, spec_dict["expdata"], name=tr_name) - loss_vl = losses.LossInvcovmat(invcovmat_vl, spec_dict["expdata_vl"], name=vl_name) - loss_ex = losses.LossInvcovmat(invcovmat, spec_dict["expdata_true"], name=ex_name) + # Prepare the loss function, important! the loss function must carry the name of the experiment! # Generate the loss function and rotations of the final data (if any) if spec_dict.get("data_transformation") is not None: # TODO: I'm asuming that the diagonal covmat will work ootb, check # The rotation is the last layer so it should carry The Name - obsrot_tr = ObsRotation(spec_dict.get("data_transformation"), name=tr_name) - obsrot_vl = ObsRotation(spec_dict.get("data_transformation_vl"), name=vl_name) + obsrot_tr = ObsRotation(spec_dict.get("data_transformation")) + obsrot_vl = ObsRotation(spec_dict.get("data_transformation_vl")) else: obsrot_tr = None obsrot_vl = None - def out_tr(pdf_layer, datasets_out=None): + + # Prepare the inverse covmats for each of the loss functions + # (that are only instantiated when the output layer is created) + invcovmat_tr = spec_dict["invcovmat"] + invcovmat_vl = spec_dict["invcovmat_vl"] + invcovmat = spec_dict["invcovmat_true"] + + def out_tr(pdf_layer, mask=None, datasets_out=None): + loss_tr = losses.LossInvcovmat(invcovmat_tr, spec_dict["expdata"], mask, name=spec_name) exp_result = experiment_layer( pdf_layer, model_obs=model_obs_tr, concat=concat_tr, - datasets_out=datasets_out, rotation=obsrot_tr, ) return loss_tr(exp_result) - def out_vl(pdf_layer, datasets_out=None): + def out_vl(pdf_layer, mask=None, datasets_out=None): + loss_vl = losses.LossInvcovmat(invcovmat_vl, spec_dict["expdata_vl"], mask, name=f"{spec_name}_val") exp_result = experiment_layer( pdf_layer, model_obs=model_obs_vl, concat=concat_vl, - datasets_out=datasets_out, rotation=obsrot_vl, ) return loss_vl(exp_result) - def out_exp(pdf_layer, datasets_out=None): + def out_exp(pdf_layer, mask=None, datasets_out=None): + loss_ex = losses.LossInvcovmat(invcovmat, spec_dict["expdata_true"], mask, name=f"{spec_name}_exp") return loss_ex(experiment_layer(pdf_layer)) layer_info = { "inputs": model_inputs, "output": out_exp, - "loss": loss, "output_tr": out_tr, - "loss_tr": loss, "output_vl": out_vl, - "loss_vl": loss, "experiment_xsize": full_nx, } diff --git a/n3fit/src/n3fit/msr.py b/n3fit/src/n3fit/msr.py index d009e26054..11a19b1bfc 100644 --- a/n3fit/src/n3fit/msr.py +++ b/n3fit/src/n3fit/msr.py @@ -39,11 +39,16 @@ def gen_integration_input(nx): def msr_impose(nx=int(2e3), basis_size=8, mode='All'): """ - This function receives: - - fit_layer: the 8-basis layer of PDF which we fit - - final_layer: the 14-basis which is fed to the fktable - It uses pdf_fit to compute the sum rule and returns a modified version of - the final_pdf layer with a normalisation by which the sum rule is imposed + Generates a function that applies a normalization layer to the fit. + The normalization is computed from the direct output of the NN (so the 7,8-flavours basis) + and it is applied to the input of the fktable (i.e., to the 14-flavours fk-basis). + + Parameters + ---------- + nx: int + number of points for the integration grid + basis_size: int + number of flavours output of the NN """ # 1. Generate the fake input which will be used to integrate xgrid, weights_array = gen_integration_input(nx) @@ -61,7 +66,7 @@ def msr_impose(nx=int(2e3), basis_size=8, mode='All'): # 5. Make the xgrid array into a backend input layer so it can be given to the normalization xgrid_input = operations.numpy_to_input(xgrid) - # Now parepare a function that takes as input the 8-flavours output of the NN + # Now prepare a function that takes as input the 8-flavours output of the NN # and the 14-flavours after the fk rotation and returns a 14-flavours normalized output # note + TODO: # the idea was that the normalization should always be applied at the fktable 14-flavours diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index ec59f9e385..995d5bb6af 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -143,6 +143,8 @@ def performfit( activate some debug options maxcores: int maximum number of (logical) cores that the backend should be aware of + parallel_models: int + number of models to be run in parallel """ from n3fit.backends import set_initial_state @@ -303,9 +305,6 @@ def performfit( # After the fit is run we get a 'result' dictionary with the following items: stopping_object = result["stopping_object"] pdf_models = result["pdf_models"] - true_chi2 = result["loss"] - training = result["training"] - log.info("Total exp chi2: %s", true_chi2) # Where has the stopping point happened (this is only for debugging purposes) print( From 96bf3b50237fc295b810799ce34fc2db1f8bdf76 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 5 Jan 2021 20:53:51 +0100 Subject: [PATCH 20/27] abstract out the generation of the experimental layers --- n3fit/src/n3fit/ModelTrainer.py | 2 +- .../src/n3fit/hyper_optimization/penalties.py | 4 +- n3fit/src/n3fit/model_gen.py | 173 ++++++++++-------- 3 files changed, 97 insertions(+), 82 deletions(-) diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py index 755011bbfb..e8dc93084d 100644 --- a/n3fit/src/n3fit/ModelTrainer.py +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -812,7 +812,7 @@ def hyperparametrizable(self, params): # If the fit failed to fit, no need to add a penalty to the loss break for penalty in self.hyper_penalties: - hyper_loss += penalty(pdf_model, stopping_object) + hyper_loss += penalty(pdf_models[0], stopping_object) l_hyper.append(hyper_loss) log.info("Fold %d finished, loss=%.1f, pass=%s", k + 1, hyper_loss, passed) if hyper_loss > self.hyper_threshold: diff --git a/n3fit/src/n3fit/hyper_optimization/penalties.py b/n3fit/src/n3fit/hyper_optimization/penalties.py index 7d3ef24708..b6bad1c8f3 100644 --- a/n3fit/src/n3fit/hyper_optimization/penalties.py +++ b/n3fit/src/n3fit/hyper_optimization/penalties.py @@ -83,11 +83,11 @@ def patience(pdf_model, stopping_object, alpha=1e-4): 3.434143467595683 """ - epoch_best = stopping_object.e_best_chi2 + epoch_best = np.take(stopping_object.e_best_chi2, 0) patience = stopping_object.stopping_patience max_epochs = stopping_object.total_epochs diff = abs(max_epochs - patience - epoch_best) - vl_loss = stopping_object.vl_chi2 + vl_loss = np.take(stopping_object.vl_chi2, 0) return vl_loss * np.exp(alpha * diff) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 4484b42472..00430e8a2f 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -7,6 +7,9 @@ # pdfNN_layer_generator: Generates the PDF NN layer to be fitted """ +from dataclasses import dataclass +import numpy as np + import n3fit.msr as msr_constraints from n3fit.layers import DIS, DY, Mask, ObsRotation, losses from n3fit.layers import Preprocessing, FkRotation, FlavourToEvolution @@ -17,6 +20,61 @@ from n3fit.backends import base_layer_selector, regularizer_selector +@dataclass +class ObservableWrapper: + """Wrapper to generate the observable layer once the PDF model is prepared + It can take normal datasets or lagrande-multiplier-like datasets + (such as positivity or integrability) + """ + + name: str + observables: list + dataset_xsizes: list + invcovmat: np.array = None + multiplier: float = 1.0 + integrability: bool = False + positivity: bool = False + data: np.array = None + rotation: ObsRotation = None # only used for diagonal covmat + + def _generate_loss(self, mask=None): + """Generates the corresponding loss function depending on the values the wrapper + was initialized with""" + if self.invcovmat is not None: + loss = losses.LossInvcovmat(self.invcovmat, self.data, mask, name=self.name) + elif self.positivity: + loss = losses.LossPositivity(name=self.name, c=self.multiplier) + elif self.integrability: + loss = losses.LossIntegrability(name=self.name, c=self.multiplier) + return loss + + def _generate_experimental_layer(self, pdf): + """ Generates the experimental layer from the PDF """ + # First split the layer into the different datasets (if needed!) + if len(self.dataset_xsizes) > 1: + splitting_layer = operations.as_layer( + operations.split, + op_args=[self.dataset_xsizes], + op_kwargs={"axis": 1}, + name=f"{self.name}_split", + ) + split_pdf = splitting_layer(pdf) + else: + split_pdf = [pdf] + # Every obs gets its share of the split + output_layers = [obs(p_pdf) for p_pdf, obs in zip(split_pdf, self.observables)] + # Concatenate all datasets (so that experiments are one single entity) + ret = operations.concatenate(output_layers, axis=2) + if self.rotation is not None: + ret = self.rotation(ret) + return ret + + def __call__(self, pdf_layer, mask=None): + loss_f = self._generate_loss(mask) + experiment_prediction = self._generate_experimental_layer(pdf_layer) + return loss_f(experiment_prediction) + + def observable_generator( spec_dict, positivity_initial=1.0, integrability=False ): # pylint: disable=too-many-locals @@ -93,10 +151,7 @@ def observable_generator( operation_name, name=f"dat_{dataset_name}", ) - if spec_dict["positivity"]: - obs_layer_ex = obs_layer_tr - obs_layer_vl = obs_layer_tr - else: + if not spec_dict["positivity"]: obs_layer_ex = Obs_Layer( dataset_dict["fktables"], dataset_dict["ex_fktables"], @@ -109,6 +164,8 @@ def observable_generator( operation_name, name=f"val_{dataset_name}", ) + else: + obs_layer_ex = obs_layer_vl = None # Data transformation might need access to the full array of output data # therefore the validation and training layers should point to the full exp @@ -130,57 +187,17 @@ def observable_generator( model_obs_vl.append(obs_layer_vl) model_obs_ex.append(obs_layer_ex) - # Prepare a concatenation as experiments are one single entity formed by many datasets - def gen_concat(): - return operations.as_layer(operations.concatenate, op_kwargs={"axis": 2}) - - - concat_ex = gen_concat() - # For data transformation all concatenations are the same - if spec_dict.get("data_transformation") is None: - concat_tr = gen_concat() - concat_vl = gen_concat() - else: - concat_tr = concat_ex - concat_vl = concat_ex - - # creating the experiment as a model turns out to bad for performance - def experiment_layer( - pdf, model_obs=model_obs_ex, concat=concat_ex, rotation=None, datasets_out=None - ): - """ By default works with the experiment observable """ - output_layers = [] - # First split the pdf layer into the different datasets if needed - if len(dataset_xsizes) > 1: - splitting_layer = operations.as_layer( - operations.split, - op_args=[dataset_xsizes], - op_kwargs={"axis": 1}, - name=f"{spec_name}_split", - ) - split_pdf = splitting_layer(pdf) - else: - split_pdf = [pdf] - # every obs gets its share of the split - output_layers = [obs(p_pdf) for p_pdf, obs in zip(split_pdf, model_obs)] - # Concatenate all datasets as experiments are one single entity if needed - ret = concat(output_layers) - if rotation is not None: - ret = rotation(ret) - return ret - - # Now create the model for this experiment full_nx = sum(dataset_xsizes) if spec_dict["positivity"]: - if integrability: - loss_pos = losses.LossIntegrability(name=spec_name, c=positivity_initial) - else: - loss_pos = losses.LossPositivity(name=spec_name, c=positivity_initial) - - def out_positivity(pdf_layer, mask=None, datasets_out=None): - exp_result = experiment_layer(pdf_layer) - return loss_pos(exp_result) + out_positivity = ObservableWrapper( + spec_name, + model_obs_tr, + dataset_xsizes, + multiplier=positivity_initial, + positivity=not integrability, + integrability=integrability, + ) layer_info = { "inputs": model_inputs, @@ -189,8 +206,6 @@ def out_positivity(pdf_layer, mask=None, datasets_out=None): } return layer_info - # Prepare the loss function, important! the loss function must carry the name of the experiment! - # Generate the loss function and rotations of the final data (if any) if spec_dict.get("data_transformation") is not None: # TODO: I'm asuming that the diagonal covmat will work ootb, check @@ -201,36 +216,36 @@ def out_positivity(pdf_layer, mask=None, datasets_out=None): obsrot_tr = None obsrot_vl = None - - # Prepare the inverse covmats for each of the loss functions + # Prepare the inverse covmats for each of the loss functions # (that are only instantiated when the output layer is created) invcovmat_tr = spec_dict["invcovmat"] invcovmat_vl = spec_dict["invcovmat_vl"] invcovmat = spec_dict["invcovmat_true"] - def out_tr(pdf_layer, mask=None, datasets_out=None): - loss_tr = losses.LossInvcovmat(invcovmat_tr, spec_dict["expdata"], mask, name=spec_name) - exp_result = experiment_layer( - pdf_layer, - model_obs=model_obs_tr, - concat=concat_tr, - rotation=obsrot_tr, - ) - return loss_tr(exp_result) - - def out_vl(pdf_layer, mask=None, datasets_out=None): - loss_vl = losses.LossInvcovmat(invcovmat_vl, spec_dict["expdata_vl"], mask, name=f"{spec_name}_val") - exp_result = experiment_layer( - pdf_layer, - model_obs=model_obs_vl, - concat=concat_vl, - rotation=obsrot_vl, - ) - return loss_vl(exp_result) - - def out_exp(pdf_layer, mask=None, datasets_out=None): - loss_ex = losses.LossInvcovmat(invcovmat, spec_dict["expdata_true"], mask, name=f"{spec_name}_exp") - return loss_ex(experiment_layer(pdf_layer)) + out_tr = ObservableWrapper( + spec_name, + model_obs_tr, + dataset_xsizes, + invcovmat=invcovmat_tr, + data=spec_dict["expdata"], + rotation=obsrot_tr, + ) + out_vl = ObservableWrapper( + f"{spec_name}_val", + model_obs_vl, + dataset_xsizes, + invcovmat=invcovmat_vl, + data=spec_dict["expdata_vl"], + rotation=obsrot_vl, + ) + out_exp = ObservableWrapper( + f"{spec_name}_exp", + model_obs_ex, + dataset_xsizes, + invcovmat=invcovmat, + data=spec_dict["expdata_true"], + rotation=None, + ) layer_info = { "inputs": model_inputs, From 99eea82b94171e52769bf93b1e75af96eb9ef486 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Wed, 6 Jan 2021 13:39:57 +0100 Subject: [PATCH 21/27] possible fix for mac --- n3fit/src/n3fit/backends/keras_backend/MetaModel.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index 23b3125cd5..de6b855f3d 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -207,7 +207,13 @@ def losses_fun(): # the tensorflow variable to python primitives or numpy arrays. # Undocumented TF function that converts all the tensors from the ret dictionary to numpy arrays # if it dissapears, equivalent for us to {k: i.numpy() for k, i in ret.items()} - return tf_utils.to_numpy_or_python_type(ret) + try: + dict_result = tf_utils.to_numpy_or_python_type(ret) + except AttributeError: + # For TF < 2.2 + dict_result = {k: i.numpy() for k, i in ret.items()} + return dict_result + def compile( self, From 0c33dbcf03c3001aa54e5f5893f18b985aa308c9 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 7 Jan 2021 09:55:05 +0100 Subject: [PATCH 22/27] add some docs --- doc/sphinx/source/n3fit/runcard_detailed.rst | 72 ++++++++++++++------ 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/doc/sphinx/source/n3fit/runcard_detailed.rst b/doc/sphinx/source/n3fit/runcard_detailed.rst index f59137c1af..89cf5bc07f 100644 --- a/doc/sphinx/source/n3fit/runcard_detailed.rst +++ b/doc/sphinx/source/n3fit/runcard_detailed.rst @@ -9,8 +9,9 @@ In this section we fine-grain the explanation of the different parameters that e - :ref:`networkarch-label` - :ref:`optimizer-label` - :ref:`positivity-label` -- :ref:`otheroptions-label` - :ref:`tensorboard-label` +- :ref:`parallel-label` +- :ref:`otheroptions-label` .. _preprocessing-label: @@ -206,24 +207,6 @@ Threshold :math:`\chi2` - ``threshold_chi2``: sets a maximum validation :math:`\chi2` for the stopping to activate. Avoids (too) early stopping. -Save and load weights of the model -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: yaml - - fitting: - save: "weights.h5" - load: "weights.h5" - -- ``save``: saves the weights of the PDF model in the selected file in the replica folder. -- ``load``: loads the weights of the PDF model from the selected file. - -Since the weights depend only on the architecture of the Neural Network, -it is possible to save the weights of a Neural Network trained with one set of hyperparameters and experiments -and load it in a different runcard and continue the training from there. - -While the load file is read as an absolute path, the file to save to will be found -inside the replica folder. .. _tensorboard-label: @@ -258,3 +241,54 @@ Logging details can be visualized in the browser with the following command: Logging details will include the value of the loss for each experiment over time, the values of the weights of the NN, as well as a detailed analysis of the amount of time that TensorFlow spent on each operation. + + +.. _parallel-label: + +Running fits in parallel +------------------------ + +It is possible to run fits in parallel with ``n3fit`` by using the ``parallel_models`` +flag in the runcard (by default the number of ``parallel_models`` is set to 1). +Running in parallel can be quite hard on memory and it is only advantageous when +fitting on a GPU, where one can find a speed up equal to the number of models run +in parallel (each model being a different replica). + +At present it cannot be used together with the ``hyperopt`` module. + + +.. _otheroptions-label: + +Other options +------------- + +Threshold :math:`\chi2` +^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + fitting: + parameters: + threshold_chi2: 4.0 + +- ``threshold_chi2``: sets a maximum validation :math:`\chi2` for the stopping to activate. Avoids (too) early stopping. + + +Save and load weights of the model +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + fitting: + save: "weights.h5" + load: "weights.h5" + +- ``save``: saves the weights of the PDF model in the selected file in the replica folder. +- ``load``: loads the weights of the PDF model from the selected file. + +Since the weights depend only on the architecture of the Neural Network, +it is possible to save the weights of a Neural Network trained with one set of hyperparameters and experiments +and load it in a different runcard and continue the training from there. + +While the load file is read as an absolute path, the file to save to will be found +inside the replica folder. From 40d590a79c4a5eb4c7b2e62a3ef4c189fa06cd67 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 26 Jan 2021 11:42:12 +0100 Subject: [PATCH 23/27] fix the problems with the rebasing --- .../n3fit/backends/keras_backend/MetaModel.py | 7 ++++++- n3fit/src/n3fit/io/writer.py | 2 +- n3fit/src/n3fit/msr.py | 3 +-- n3fit/src/n3fit/performfit.py | 5 +---- n3fit/src/n3fit/stopping.py | 12 +----------- n3fit/src/n3fit/vpinterface.py | 16 ++++++++++++++-- 6 files changed, 24 insertions(+), 21 deletions(-) diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index de6b855f3d..9b1692f997 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -4,7 +4,7 @@ Extension of the backend Model class containing some wrappers in order to absorb other backend-dependent calls. """ - +import re import numpy as np import tensorflow as tf from tensorflow.keras.models import Model @@ -334,3 +334,8 @@ def apply_as_layer(self, x): # TF 2.0 seems to fail with ValueError when passing a dictionary as an input y = x.values() return super().__call__(y) + + def get_layer_re(self, regex): + """ Get all layers matching the given regular expression """ + check = lambda x: re.match(regex, x.name) + return list(filter(check, self.layers)) diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index 1b28a01de8..f8060119bc 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -69,7 +69,7 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): # Check the directory exist, if it doesn't, generate it os.makedirs(replica_path_set, exist_ok=True) - stop_epoch = self.stopping_object.epoch_of_the_stop + stop_epoch = self.stopping_object.stop_epoch # Get the replica status for this object replica_status = self.stopping_object.get_next_replica() diff --git a/n3fit/src/n3fit/msr.py b/n3fit/src/n3fit/msr.py index 11a19b1bfc..7bd2cd969f 100644 --- a/n3fit/src/n3fit/msr.py +++ b/n3fit/src/n3fit/msr.py @@ -79,12 +79,11 @@ def apply_normalization(layer_fitbasis, layer_pdf): layer_fitbasis: output of the NN layer_pdf: output for the fktable """ - pdf_integrand = operations.op_multiply([division_by_x(xgrid_input), layer_fitbasis(xgrid_input)]) normalization = normalizer(integrator(pdf_integrand)) def ultimate_pdf(x): - return operations.op_multiply([layer_pdf(x), normalization]) + return layer_pdf(x)*normalization return ultimate_pdf diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index 995d5bb6af..1f642a02db 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -246,11 +246,8 @@ def performfit( kfold_parameters=kfold_parameters, max_cores=maxcores, model_file=fitting.get("load"), -<<<<<<< HEAD - sum_rules=fitting.get("sum_rules", True) -======= + sum_rules=fitting.get("sum_rules", True), parallel_models=parallel_models ->>>>>>> 6e0fc5f2c (fit many models at once) ) # This is just to give a descriptive name to the fit function diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index a0e750e403..45b5990406 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -460,18 +460,8 @@ def vl_chi2(self): @property def e_best_chi2(self): -<<<<<<< HEAD - """ Epoch of the best chi2, if there is no best epoch - return the last epoch""" - be = self.history.best_epoch - if be is None: - return self.stop_epoch - return be - -======= - """ Epoch of the best chi2 """ + """ Epoch of the best chi2, if there is no best epoch, return None""" return self._history.best_epoch ->>>>>>> 5fb6df564 (many changes to stopping, remove deprecated options) @property def stop_epoch(self): diff --git a/n3fit/src/n3fit/vpinterface.py b/n3fit/src/n3fit/vpinterface.py index dd5077e8c5..b4d4a975b6 100644 --- a/n3fit/src/n3fit/vpinterface.py +++ b/n3fit/src/n3fit/vpinterface.py @@ -18,12 +18,14 @@ """ +import logging import numpy as np import numpy.linalg as la from validphys.core import PDF, MCStats from validphys.pdfbases import ALL_FLAVOURS, check_basis from validphys.arclength import integrability_number, arc_lengths +log = logging.getLogger(__name__) # Order of the evolution basis output from n3fit EVOL_LIST = [ "photon", @@ -83,13 +85,23 @@ def get_nn_weights(self): """Outputs all weights of the NN as numpy.ndarrays """ return self.model.get_weights() - def get_preprocessing_factors(self): + def get_preprocessing_factors(self, replica=None): """Loads the preprocessing alpha and beta arrays from the PDF trained model. If a ``fit_basis`` given in the format of ``n3fit`` runcards is given it will be used to generate a new dictionary with the names, the exponent and whether they are trainable otherwise outputs a Nx2 array where [:,0] are alphas and [:,1] betas """ - preprocessing_layer = self.model.get_layer("pdf_prepro") + # If the replica is given, get the requested preprocessing layer + # otherwise, search for any pdf_prepro_X layer within the model + if replica is not None: + preprocessing_layer = self.model.get_layer(f"pdf_prepro_{replica}") + else: + preprocessing_layers = self.model.get_layer_re("pdf_prepro_\d") + if len(preprocessing_layers) != 1: + # We really don't want to fail at this point, but print a warning at least... + log.warning("More than one preprocessing layer found within the model!") + preprocessing_layer = preprocessing_layers[0] + if self.fit_basis is not None: output_dictionaries = [] for d in self.fit_basis: From 117a9119db663fdb11fd9bb21b633ba73089b8eb Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 26 Jan 2021 12:24:24 +0100 Subject: [PATCH 24/27] fix the tests --- n3fit/src/n3fit/io/writer.py | 2 +- n3fit/src/n3fit/stopping.py | 31 ++++++++++++++++++++----------- n3fit/src/n3fit/tests/test_fit.py | 3 +++ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index f8060119bc..48dabd5c2a 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -81,7 +81,7 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): replica_path_set, fitname, self.q2, - self.stopping_object.e_best_chi2, + replica_status.best_epoch, vl_chi2, tr_chi2, true_chi2, diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index 45b5990406..94fb7e8dee 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -249,6 +249,7 @@ def __init__(self, pdf_model): self._pdf_model = pdf_model self._weights = None self._best_epoch = None + self._stop_epoch = None self._best_vl_chi2 = INITIAL_CHI2 def positivity_pass(self): @@ -260,8 +261,14 @@ def positivity_pass(self): @property def best_epoch(self): + if self._best_epoch is None: + return self.stop_epoch return self._best_epoch + @property + def stop_epoch(self): + return self._stop_epoch + @property def best_vl(self): return float(self._best_vl_chi2) @@ -284,9 +291,11 @@ def reload(self): if self._weights: self._pdf_model.set_weights(self._weights) - def stop_training(self): - """ Stop training this replica """ - self._pdf_model.trainable = False + def stop_training(self, epoch = None): + """ Stop training this replica if not stopped before """ + if self._pdf_model.trainable: + self._pdf_model.trainable = False + self._stop_epoch = epoch class FitHistory: @@ -376,16 +385,16 @@ def register(self, epoch, training_info, validation_info): self._history.append(fitstate) return fitstate - def stop_training_replica(self, i): - """ Stop training replica i """ - self._replicas[i].stop_training() + def stop_training_replica(self, i, e): + """ Stop training replica i in epoch e""" + self._replicas[i].stop_training(e) def reload(self): - """Reloads the best fit weights into the model - if there are models to be reloaded - A set of weights can be enforced as an optional argument + """Reloads the best fit weights into the model if there are models to be reloaded + Ensure that all replicas have stopped at this point. """ for replica in self._replicas: + replica.stop_training(self.final_epoch) replica.reload() def __next__(self): @@ -460,7 +469,7 @@ def vl_chi2(self): @property def e_best_chi2(self): - """ Epoch of the best chi2, if there is no best epoch, return None""" + """ Epoch of the best chi2, if there is no best epoch, return last""" return self._history.best_epoch @property @@ -551,7 +560,7 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): stop_replicas = self.count & (self.stopping_degree > self.stopping_patience) for i in np.where(stop_replicas)[0]: self.count[i] = 0 - self._history.stop_training_replica(i) + self._history.stop_training_replica(i, epoch) # By using the stopping degree we only stop when none of the replicas are improving anymore if min(self.stopping_degree) > self.stopping_patience: diff --git a/n3fit/src/n3fit/tests/test_fit.py b/n3fit/src/n3fit/tests/test_fit.py index 2c3f509405..b423589ec8 100644 --- a/n3fit/src/n3fit/tests/test_fit.py +++ b/n3fit/src/n3fit/tests/test_fit.py @@ -180,3 +180,6 @@ def test_weirdbasis(tmp_path, timing=30): # with pytest.raises(sp.TimeoutExpired): with pytest.raises(sp.CalledProcessError): sp.run(f"{EXE} {quickcard} {REPLICA}".split(), cwd=tmp_path, timeout=timing, check=True) + + +# test_performfit_and_timing(pathlib.Path("/tmp/random_path")) From 702a325fd93aae9fd898fe04654cef0a4b40f87b Mon Sep 17 00:00:00 2001 From: Juacrumar Date: Wed, 27 Jan 2021 15:54:18 +0100 Subject: [PATCH 25/27] Update n3fit/src/n3fit/model_gen.py Co-authored-by: Roy Stegeman --- n3fit/src/n3fit/model_gen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 00430e8a2f..33aa2dffc0 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -23,7 +23,7 @@ @dataclass class ObservableWrapper: """Wrapper to generate the observable layer once the PDF model is prepared - It can take normal datasets or lagrande-multiplier-like datasets + It can take normal datasets or Lagrange-multiplier-like datasets (such as positivity or integrability) """ From 35467800e07c84b7efe80e7e0947a83208d96adf Mon Sep 17 00:00:00 2001 From: Juacrumar Date: Wed, 27 Jan 2021 16:05:39 +0100 Subject: [PATCH 26/27] Update n3fit/src/n3fit/model_gen.py Co-authored-by: Roy Stegeman --- n3fit/src/n3fit/model_gen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 33aa2dffc0..4554eebdbc 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -450,7 +450,7 @@ def pdfNN_layer_generator( Returns ------- - model_pdf: n3fit.backends.MetaModel + pdf_models: list with a number equal to `parallel_models` of type n3fit.backends.MetaModel a model f(x) = y where x is a tensor (1, xgrid, 1) and y a tensor (1, xgrid, out) """ # Parse the input configuration From b6e8543038f0e638dded2050345a7b474b0f051c Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 28 Jan 2021 11:51:44 +0100 Subject: [PATCH 27/27] apply comments --- n3fit/src/n3fit/ModelTrainer.py | 1 - .../n3fit/backends/keras_backend/MetaModel.py | 2 +- n3fit/src/n3fit/checks.py | 2 +- n3fit/src/n3fit/layers/DIS.py | 8 +++---- n3fit/src/n3fit/model_gen.py | 22 +++++++++---------- n3fit/src/n3fit/msr.py | 10 ++++----- n3fit/src/n3fit/performfit.py | 2 +- n3fit/src/n3fit/tests/test_fit.py | 3 --- 8 files changed, 22 insertions(+), 28 deletions(-) diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py index e8dc93084d..b5401983e0 100644 --- a/n3fit/src/n3fit/ModelTrainer.py +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -720,7 +720,6 @@ def hyperparametrizable(self, params): threshold_chi2 = params.get("threshold_chi2", CHI2_THRESHOLD) # Initialize the chi2 dictionaries - l_train = [] l_valid = [] l_exper = [] l_hyper = [] diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index 9b1692f997..3bc7862cd1 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -157,7 +157,7 @@ def perform_fit(self, x=None, y=None, epochs=1, **kwargs): x = self._parse_input(self.x_in) if y is None: y = self.target_tensors - history = self.fit(x=x, y=y, epochs=epochs, **kwargs) + history = super().fit(x=x, y=y, epochs=epochs, **kwargs) loss_dict = history.history return loss_dict diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py index 69c5c43290..8c99e631c7 100644 --- a/n3fit/src/n3fit/checks.py +++ b/n3fit/src/n3fit/checks.py @@ -355,7 +355,7 @@ def check_consistent_basis(fitting, theoryid): @make_argcheck -def can_run_in_parallel(fitting, replica, hyperopt, parallel_models=1): +def can_run_multiple_replicas(fitting, replica, hyperopt, parallel_models=1): """ Checks whether a runcard which is trying to run several replicas at once (parallel_models =/= 1) is valid """ rp = len(replica) diff --git a/n3fit/src/n3fit/layers/DIS.py b/n3fit/src/n3fit/layers/DIS.py index 570eb11456..1782ece969 100644 --- a/n3fit/src/n3fit/layers/DIS.py +++ b/n3fit/src/n3fit/layers/DIS.py @@ -18,8 +18,8 @@ class DIS(Observable): the incoming pdf. The fktable is expected to be rank 3 (ndata, xgrid, flavours) - while the input pdf is also rank 3 where the first dimension is the batch dimension - (1, xgrid, flavours) + while the input pdf is rank 4 where the first dimension is the batch dimension + and the last dimension the number of replicas being fitted (1, xgrid, flavours, replicas) """ def gen_mask(self, basis): @@ -50,12 +50,12 @@ def call(self, pdf): Parameters ---------- pdf: backend tensor - rank 3 tensor (batch_size, xgrid, flavours) + rank 4 tensor (batch_size, xgrid, flavours, replicas) Returns ------- result: backend tensor - rank 1 tensor (batchsize, ndata) + rank 3 tensor (batchsize, replicas, ndata) """ # DIS never needs splitting if self.splitting is not None: diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 4554eebdbc..fa0c541cef 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -15,7 +15,7 @@ from n3fit.layers import Preprocessing, FkRotation, FlavourToEvolution from n3fit.backends import MetaModel, Input -from n3fit.backends import operations +from n3fit.backends import operations as op from n3fit.backends import MetaLayer, Lambda from n3fit.backends import base_layer_selector, regularizer_selector @@ -52,8 +52,8 @@ def _generate_experimental_layer(self, pdf): """ Generates the experimental layer from the PDF """ # First split the layer into the different datasets (if needed!) if len(self.dataset_xsizes) > 1: - splitting_layer = operations.as_layer( - operations.split, + splitting_layer = op.as_layer( + op.split, op_args=[self.dataset_xsizes], op_kwargs={"axis": 1}, name=f"{self.name}_split", @@ -64,7 +64,7 @@ def _generate_experimental_layer(self, pdf): # Every obs gets its share of the split output_layers = [obs(p_pdf) for p_pdf, obs in zip(split_pdf, self.observables)] # Concatenate all datasets (so that experiments are one single entity) - ret = operations.concatenate(output_layers, axis=2) + ret = op.concatenate(output_layers, axis=2) if self.rotation is not None: ret = self.rotation(ret) return ret @@ -113,11 +113,9 @@ def observable_generator( a dictionary with: - `inputs`: input layer - `output`: output layer (unmasked) - - `loss` : loss function (unmasked) - `output_tr`: output layer (training) - - `loss_tr` : loss function (training) - `output_vl`: output layer (validation) - - `loss_vl` : loss function (validation) + - `experiment_xsize` : int (size of the output array) """ spec_name = spec_dict["name"] dataset_xsizes = [] @@ -484,7 +482,7 @@ def pdfNN_layer_generator( # If the input is of type (x, logx) # create a x --> (x, logx) layer to preppend to everything if inp == 2: - add_log = Lambda(lambda x: operations.concatenate([x, operations.op_log(x)], axis=-1)) + add_log = Lambda(lambda x: op.concatenate([x, op.op_log(x)], axis=-1)) # Evolution layer layer_evln = FkRotation(input_shape=(last_layer_nodes,), output_dim=out) @@ -494,10 +492,10 @@ def pdfNN_layer_generator( # Normalization and sum rules if impose_sumrule: - sumrule_imposition, integrator_input = msr_constraints.msr_impose(mode=impose_sumrule) + sumrule_layer, integrator_input = msr_constraints.msr_impose(mode=impose_sumrule) model_input = [integrator_input, placeholder_input] else: - sumrule_imposition = lambda x: x + sumrule_layer = lambda x: x integrator_input = None model_input = [placeholder_input] @@ -550,7 +548,7 @@ def dense_me(x): # Apply preprocessing and basis def layer_fitbasis(x): - ret = operations.op_multiply([dense_me(x), layer_preproc(x)]) + ret = op.op_multiply([dense_me(x), layer_preproc(x)]) if basis_rotation.is_identity(): # if we don't need to rotate basis we don't want spurious layers return ret @@ -561,7 +559,7 @@ def layer_pdf(x): return layer_evln(layer_fitbasis(x)) # Final PDF - final_pdf = sumrule_imposition(layer_fitbasis, layer_pdf) + final_pdf = sumrule_layer(layer_fitbasis, layer_pdf) pdf_model = MetaModel(model_input, final_pdf(placeholder_input), name=f"PDF_{i}") diff --git a/n3fit/src/n3fit/msr.py b/n3fit/src/n3fit/msr.py index 7bd2cd969f..20c43804cc 100644 --- a/n3fit/src/n3fit/msr.py +++ b/n3fit/src/n3fit/msr.py @@ -5,7 +5,7 @@ import numpy as np from n3fit.layers import xDivide, MSR_Normalization, xIntegrator -from n3fit.backends import operations +from n3fit.backends import operations as op from n3fit.backends import MetaModel @@ -64,7 +64,7 @@ def msr_impose(nx=int(2e3), basis_size=8, mode='All'): normalizer = MSR_Normalization(input_shape=(basis_size,), mode=mode) # 5. Make the xgrid array into a backend input layer so it can be given to the normalization - xgrid_input = operations.numpy_to_input(xgrid) + xgrid_input = op.numpy_to_input(xgrid) # Now prepare a function that takes as input the 8-flavours output of the NN # and the 14-flavours after the fk rotation and returns a 14-flavours normalized output @@ -79,7 +79,7 @@ def apply_normalization(layer_fitbasis, layer_pdf): layer_fitbasis: output of the NN layer_pdf: output for the fktable """ - pdf_integrand = operations.op_multiply([division_by_x(xgrid_input), layer_fitbasis(xgrid_input)]) + pdf_integrand = op.op_multiply([division_by_x(xgrid_input), layer_fitbasis(xgrid_input)]) normalization = normalizer(integrator(pdf_integrand)) def ultimate_pdf(x): @@ -99,12 +99,12 @@ def check_integration(ultimate_pdf, integration_input): """ nx = int(1e4) xgrid, weights_array = gen_integration_input(nx) - xgrid_input = operations.numpy_to_input(xgrid) + xgrid_input = op.numpy_to_input(xgrid) multiplier = xDivide(output_dim=14, div_list=range(3, 9)) def pdf_integrand(x): - res = operations.op_multiply([multiplier(x), ultimate_pdf(x)]) + res = op.op_multiply([multiplier(x), ultimate_pdf(x)]) return res modelito = MetaModel([xgrid_input, integration_input], pdf_integrand(xgrid_input)) diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index 1f642a02db..dd912d997e 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -75,7 +75,7 @@ def initialize_seeds(replica: list, trvlseed: int, nnseed: int, mcseed: int, gen # Action to be called by valid phys # All information defining the NN should come here in the "parameters" dict -@n3fit.checks.can_run_in_parallel +@n3fit.checks.can_run_multiple_replicas @n3fit.checks.check_consistent_basis @n3fit.checks.wrapper_check_NN @n3fit.checks.wrapper_hyperopt diff --git a/n3fit/src/n3fit/tests/test_fit.py b/n3fit/src/n3fit/tests/test_fit.py index b423589ec8..2c3f509405 100644 --- a/n3fit/src/n3fit/tests/test_fit.py +++ b/n3fit/src/n3fit/tests/test_fit.py @@ -180,6 +180,3 @@ def test_weirdbasis(tmp_path, timing=30): # with pytest.raises(sp.TimeoutExpired): with pytest.raises(sp.CalledProcessError): sp.run(f"{EXE} {quickcard} {REPLICA}".split(), cwd=tmp_path, timeout=timing, check=True) - - -# test_performfit_and_timing(pathlib.Path("/tmp/random_path"))