diff --git a/doc/sphinx/source/n3fit/runcard_detailed.rst b/doc/sphinx/source/n3fit/runcard_detailed.rst index f59137c1af..89cf5bc07f 100644 --- a/doc/sphinx/source/n3fit/runcard_detailed.rst +++ b/doc/sphinx/source/n3fit/runcard_detailed.rst @@ -9,8 +9,9 @@ In this section we fine-grain the explanation of the different parameters that e - :ref:`networkarch-label` - :ref:`optimizer-label` - :ref:`positivity-label` -- :ref:`otheroptions-label` - :ref:`tensorboard-label` +- :ref:`parallel-label` +- :ref:`otheroptions-label` .. _preprocessing-label: @@ -206,24 +207,6 @@ Threshold :math:`\chi2` - ``threshold_chi2``: sets a maximum validation :math:`\chi2` for the stopping to activate. Avoids (too) early stopping. -Save and load weights of the model -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: yaml - - fitting: - save: "weights.h5" - load: "weights.h5" - -- ``save``: saves the weights of the PDF model in the selected file in the replica folder. -- ``load``: loads the weights of the PDF model from the selected file. - -Since the weights depend only on the architecture of the Neural Network, -it is possible to save the weights of a Neural Network trained with one set of hyperparameters and experiments -and load it in a different runcard and continue the training from there. - -While the load file is read as an absolute path, the file to save to will be found -inside the replica folder. .. _tensorboard-label: @@ -258,3 +241,54 @@ Logging details can be visualized in the browser with the following command: Logging details will include the value of the loss for each experiment over time, the values of the weights of the NN, as well as a detailed analysis of the amount of time that TensorFlow spent on each operation. + + +.. _parallel-label: + +Running fits in parallel +------------------------ + +It is possible to run fits in parallel with ``n3fit`` by using the ``parallel_models`` +flag in the runcard (by default the number of ``parallel_models`` is set to 1). +Running in parallel can be quite hard on memory and it is only advantageous when +fitting on a GPU, where one can find a speed up equal to the number of models run +in parallel (each model being a different replica). + +At present it cannot be used together with the ``hyperopt`` module. + + +.. _otheroptions-label: + +Other options +------------- + +Threshold :math:`\chi2` +^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + fitting: + parameters: + threshold_chi2: 4.0 + +- ``threshold_chi2``: sets a maximum validation :math:`\chi2` for the stopping to activate. Avoids (too) early stopping. + + +Save and load weights of the model +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + fitting: + save: "weights.h5" + load: "weights.h5" + +- ``save``: saves the weights of the PDF model in the selected file in the replica folder. +- ``load``: loads the weights of the PDF model from the selected file. + +Since the weights depend only on the architecture of the Neural Network, +it is possible to save the weights of a Neural Network trained with one set of hyperparameters and experiments +and load it in a different runcard and continue the training from there. + +While the load file is read as an absolute path, the file to save to will be found +inside the replica folder. diff --git a/n3fit/runcards/Basic_runcard_parallel.yml b/n3fit/runcards/Basic_runcard_parallel.yml new file mode 100644 index 0000000000..52fdcae4a3 --- /dev/null +++ b/n3fit/runcards/Basic_runcard_parallel.yml @@ -0,0 +1,106 @@ +# +# Configuration file for n3fit +# + +############################################################ +description: Basic runcard + +############################################################ +# frac: training fraction +# ewk: apply ewk k-factors +# sys: systematics treatment (see systypes) +dataset_inputs: +- { dataset: SLACP, frac: 0.5} +- { dataset: NMC, frac: 0.5 } +- { dataset: NMCPD, frac: 0.5 } +- { dataset: CMSJETS11, frac: 0.5, sys: 10 } + +############################################################ +datacuts: + t0pdfset : NNPDF31_nlo_as_0118 # PDF set to generate t0 covmat + q2min : 3.49 # Q2 minimum + w2min : 12.5 # W2 minimum + combocuts : NNPDF31 # NNPDF3.0 final kin. cuts + jetptcut_tev : 0 # jet pt cut for tevatron + jetptcut_lhc : 0 # jet pt cut for lhc + wptcut_lhc : 30.0 # Minimum pT for W pT diff distributions + jetycut_tev : 1e30 # jet rap. cut for tevatron + jetycut_lhc : 1e30 # jet rap. cut for lhc + dymasscut_min: 0 # dy inv.mass. min cut + dymasscut_max: 1e30 # dy inv.mass. max cut + jetcfactcut : 1e30 # jet cfact. cut + +############################################################ +theory: + theoryid: 53 # database id + +############################################################ +fitting: + trvlseed: 1 + nnseed: 2 + mcseed: 3 + + genrep: False # true = generate MC replicas, false = use real data + + parameters: # This defines the parameter dictionary that is passed to the Model Trainer + nodes_per_layer: [15, 10, 8] + activation_per_layer: ['sigmoid', 'sigmoid', 'linear'] + initializer: 'glorot_normal' + optimizer: + optimizer_name: 'RMSprop' + learning_rate: 0.01 + clipnorm: 1.0 + epochs: 900 + positivity: + multiplier: 1.05 # When any of the multiplier and/or the initial is not set + initial: # the poslambda will be used instead to compute these values per dataset + threshold: 1e-5 + stopping_patience: 0.30 # percentage of the number of epochs + layer_type: 'dense' + dropout: 0.0 + threshold_chi2: 5.0 + + # NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7) + # EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7) + # EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7) + # FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7) + fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc. + basis: + # remeber to change the name of PDF accordingly with fitbasis + # pos: True for NN squared + # mutsize: mutation size + # mutprob: mutation probability + # smallx, largex: preprocessing ranges + - { fl: sng, pos: False, mutsize: [15], mutprob: [0.05], smallx: [1.05,1.19], largex: [1.47,2.70], trainable: False } + - { fl: g, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.94,1.25], largex: [0.11,5.87], trainable: False } + - { fl: v, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.54,0.75], largex: [1.15,2.76], trainable: False } + - { fl: v3, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.21,0.57], largex: [1.35,3.08] } + - { fl: v8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.52,0.76], largex: [0.77,3.56], trainable: True } + - { fl: t3, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.37,1.52], largex: [1.74,3.39] } + - { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.56,1.29], largex: [1.45,3.03] } + - { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.12,1.19], largex: [1.83,6.70] } + +############################################################ +positivity: + posdatasets: + - { dataset: POSF2U, poslambda: 1e6 } # Positivity Lagrange Multiplier + - { dataset: POSFLL, poslambda: 1e4 } + +############################################################ +integrability: + integdatasets: + - {dataset: INTEGXT3, poslambda: 1e2} + +############################################################ +lhagrid: + nx : 150 + xmin: 1e-9 + xmed: 0.1 + xmax: 1.0 + nq : 50 + qmax: 1e5 + +############################################################ +debug: True +maxcores: 8 +parallel_models: 2 diff --git a/n3fit/runcards/DIS_diagonal_l2reg_example.yml b/n3fit/runcards/DIS_diagonal_l2reg_example.yml index f392199f07..7b56a2fdc9 100644 --- a/n3fit/runcards/DIS_diagonal_l2reg_example.yml +++ b/n3fit/runcards/DIS_diagonal_l2reg_example.yml @@ -76,7 +76,7 @@ fitting: optimizer: learning_rate: 1.0 optimizer_name: 'Adadelta' - epochs: 40000 + epochs: 4000 positivity: multiplier: 1.09 initial: 10.0 diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py index 1fa73109b8..b5401983e0 100644 --- a/n3fit/src/n3fit/ModelTrainer.py +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -9,6 +9,7 @@ between iterations while at the same time keeping the amount of redundant calls to a minimum """ import logging +from itertools import zip_longest import numpy as np import n3fit.model_gen as model_gen from n3fit.backends import MetaModel, clear_backend_state, operations, callbacks @@ -29,101 +30,12 @@ PUSH_INTEGRABILITY_EACH = 100 -def _assign_data_to_model(model, data_dict, fold_k=0): - """ - Reads the data dictionary (``data_dict``) and assings the target data to the model - It returns a dictionary containing: - { - 'model': the backend.MetaModel to be trained - 'target_ndata': an array of target output - 'ndata': the number of datapoints - 'losses': the list of loss functions of the model - } - - If kfolding is active applies the (``fold_k``) fold to the target data. - in this case ndata is a count of the non_zero entries of the fold - - Note: this function is transitional. Eventually a validphys action should - provide an experiment object with a .target_data(fold_indx) method which - should return the necessary information: - - number of datapoints - - target data with the right entries set to 0* - *or masked away if it is able to also return a list of loss functions that will mask away - the corresponding entries of the prediction - - - Parameters - ---------- - model: backend.MetaModel - model to be added to the dictionary - data_dict: dict - dictionary containing: { - 'expdata' : list of experimental data which the model will target, - 'folds' : a list (size=expdata) of lists (size=kfolds) with the folding masks) - 'losses': a list of loss functions for the model - } - fold_k: int - when kfolding, index of the fold, so that for every experiment we apply the - folds[index_experiment][mask] - - Returns - ------- - ret: dict - dictionary containing the model and its associated ndata and loss - """ - # Array with all data - all_data = data_dict["expdata"] - # Each element of this list correspond to the set of folds for one experiment - all_folds = data_dict["folds"] - n_exps = len(all_folds) - # Now set to 0 the data folded away - active_data = [] - ndata = 0 - for exp_data, exp_fold in zip(all_data, all_folds): - if exp_fold: - mask = exp_fold[fold_k] - active_data.append(exp_data * mask) - ndata += np.count_nonzero(mask) - else: - active_data.append(exp_data) - ndata += exp_data.size - # There might be special outputs (like positivitiy) that is not - # affected by the folding. - # They don't count for the chi2 (which is only for reporting) - active_data += all_data[n_exps:] - ret = { - "model": model, - "target_data": active_data, - "ndata": ndata, - "losses": data_dict["losses"], - } - return ret - - -def _model_compilation(models, optimizer_params): - """ - Compiles all models - - Parameters - ---------- - models: list(dict) - A ditionary defining the model - optimizer_params: dict - Optimizer parameters to be passes to the compile method of the model - """ - for _, model_dict in models.items(): - model = model_dict["model"] - target = model_dict["target_data"] - losses = model_dict["losses"] - model.compile(loss=losses, target_output=target, **optimizer_params) - - -def _pdf_injection(pdf_layers, observables, datasets_out=None): +def _pdf_injection(pdf_layers, observables, masks): """ Takes as input a list of output layers and returns a corresponding list where all output layers call the pdf layer at self.pdf_layer """ - return [f(x, datasets_out=datasets_out) for f, x in zip(observables, pdf_layers)] + return [f(x, mask=m) for f, x, m in zip_longest(observables, pdf_layers, masks)] def _LM_initial_and_multiplier(input_initial, input_multiplier, max_lambda, steps): @@ -174,27 +86,43 @@ def __init__( pass_status="ok", failed_status="fail", debug=False, - save_weights_each=False, kfold_parameters=None, max_cores=None, - model_file=None, + model_file=None, sum_rules=True, + parallel_models=1 ): """ Parameters ---------- - exp_info: list of dictionaries containing experiments - pos_info: list of dictionaries containing positivity sets - integ_info: list of dictionaries containing integrability sets - flavinfo: the object returned by fitting['basis'] - nnseed: the seed used to initialise the Neural Network, will be passed to model_gen - pass_status: flag to signal a good run - failed_status: flag to signal a bad run - debug: flag to activate some debug options - save_weights_each: if set, save the state of the fit - every ``save_weights_each`` epochs - model_file: str whether to save the models - sum_rules: str whether sum rules should be enabled (All, MSR, VSR, False) + exp_info: list + list of dictionaries containing experiments + pos_info: list + list of dictionaries containing positivity sets + integ_info: list + list of dictionaries containing integrability sets + flavinfo: list + the object returned by fitting['basis'] + fitbasis: str + the name of the basis being fitted + nnseed: int + the seed used to initialise the Neural Network, will be passed to model_gen + pass_status: str + flag to signal a good run + failed_status: str + flag to signal a bad run + debug: bool + flag to activate some debug options + kfold_parameters: dict + parameters defining the kfolding method + max_cores: int + maximum number of cores the fitting can use to run + model_file: str + whether to save the models + sum_rules: str + whether sum rules should be enabled (All, MSR, VSR, False) + parallel_models: int + number of models to fit in parallel """ # Save all input information @@ -211,8 +139,8 @@ def __init__( self.pass_status = pass_status self.failed_status = failed_status self.debug = debug - self.save_weights_each = save_weights_each self.all_datasets = [] + self.parallel_models = parallel_models # Initialise internal variables which define behaviour if debug: @@ -251,7 +179,6 @@ def __init__( self.training = { "output": [], "expdata": [], - "losses": [], "ndata": 0, "model": None, "posdatasets": [], @@ -265,7 +192,6 @@ def __init__( self.validation = { "output": [], "expdata": [], - "losses": [], "ndata": 0, "model": None, "folds": [], @@ -274,13 +200,10 @@ def __init__( self.experimental = { "output": [], "expdata": [], - "losses": [], "ndata": 0, "model": None, "folds": [], } - self.model_dicts = None - self._fill_the_dictionaries() if self.validation["ndata"] == 0: @@ -359,33 +282,37 @@ def _fill_the_dictionaries(self): self.training["expdata"].append(integ_dict["expdata"]) self.training["integdatasets"].append(integ_dict["name"]) - def _model_generation(self, pdf_model, partition): + def _model_generation(self, pdf_models, partition, partition_idx): """ Fills the three dictionaries (``training``, ``validation``, ``experimental``) - with the ``model`` entry - + with the ``model`` entry. Compiles the validation and experimental models with fakes optimizers and learning rate as they are never trained, but this is needed by some backends - in order to run evaluate on them + in order to run evaluate on them. Before entering this function the dictionaries contain a list of inputs and a list of outputs, but they are not connected. This function connects inputs with outputs by injecting the PDF. At this point we have a PDF model that takes an input (1, None, 1) - and outputs in return (1, none, 14) + and outputs in return (1, none, 14). The injection of the PDF is done by concatenating all inputs and calling pdf_model on it. This in turn generates an output_layer that needs to be splitted for every experiment as we have a set of observable "functions" that each take (1, exp_xgrid_size, 14) and output (1, masked_ndata) where masked_ndata can be the training/validation - or the experimental mask (in which cased masked_ndata == ndata) + or the experimental mask (in which cased masked_ndata == ndata). + + Several models can be fitted at once by passing a list of models with a shared input + this function will give the same input to every model and will concatenate the output at the end + so that the final output of the model is (1, None, 14, n) (with n=number of parallel models) + Parameters ---------- - pdf_model: MetaModel - model producing pdf values + pdf_models: list(n3fit.backend.MetaModel) + a list of models that produce PDF values Returns ------- @@ -398,13 +325,23 @@ def _model_generation(self, pdf_model, partition): input_arr = np.concatenate(self.input_list, axis=1) input_layer = operations.numpy_to_input(input_arr.T) - # The input to the full model is expected to be the input to the PDF - # by reutilizing `pdf_model.parse_input` we ensure any auxiliary input is also accunted fro - full_model_input_dict = pdf_model._parse_input([input_layer], pass_content=False) + # The trainable part of the model is a concatenation of all PDF models + # where each model corresponds to a different replica + all_replicas_pdf = [] + + for pdf_model in pdf_models: + # The input to the full model is expected to be the input to the PDF + # by reutilizing `pdf_model.parse_input` we ensure any auxiliary input is also accunted fro + full_model_input_dict = pdf_model._parse_input([input_layer], pass_content=False) + + # The output of the pdf on input_layer will be thus a concatenation + # of the PDF values for all experiments + full_pdf = pdf_model.apply_as_layer([input_layer]) + + all_replicas_pdf.append(full_pdf) + + full_pdf_per_replica = operations.stack(all_replicas_pdf, axis=-1) - # The output of the pdf on input_layer will be thus a concatenation - # of the PDF values for all experiments - full_pdf = pdf_model.apply_as_layer([input_layer]) # The input layer is a concatenation of all experiments # we need now to split the output on a different array per experiment sp_ar = [self.input_sizes] @@ -412,25 +349,32 @@ def _model_generation(self, pdf_model, partition): splitting_layer = operations.as_layer( operations.split, op_args=sp_ar, op_kwargs=sp_kw, name="pdf_split" ) - splitted_pdf = splitting_layer(full_pdf) + splitted_pdf = splitting_layer(full_pdf_per_replica) # If we are in a kfolding partition, select which datasets are out if partition: - kfold_datasets = partition["datasets"] - negate_k_datasets = [d for d in self.all_datasets if d not in kfold_datasets] + training_mask = [i[partition_idx] for i in self.training["folds"]] + validation_mask = [i[partition_idx] for i in self.validation["folds"]] + experimental_mask = [i[partition_idx] for i in self.experimental["folds"]] else: - kfold_datasets = None - negate_k_datasets = None + training_mask = validation_mask = experimental_mask = [None] # Training and validation leave out the kofld dataset # experiment leaves out the negation - output_tr = _pdf_injection(splitted_pdf, self.training["output"], kfold_datasets) + output_tr = _pdf_injection(splitted_pdf, self.training["output"], training_mask) training = MetaModel(full_model_input_dict, output_tr) - output_vl = _pdf_injection(splitted_pdf, self.validation["output"], kfold_datasets) + # For validation we don't have integrability + n_val = len(self.validation["output"]) + output_vl = _pdf_injection(splitted_pdf[:n_val], self.validation["output"], validation_mask) validation = MetaModel(full_model_input_dict, output_vl) - output_ex = _pdf_injection(splitted_pdf, self.experimental["output"], negate_k_datasets) + # And for experimental we don't have positivity + # TODO: risky to rely that much in the order + n_exps = len(self.experimental["output"]) + output_ex = _pdf_injection( + splitted_pdf[:n_exps], self.experimental["output"], experimental_mask + ) experimental = MetaModel(full_model_input_dict, output_ex) @@ -455,7 +399,7 @@ def _reset_observables(self): """ self.input_list = [] self.input_sizes = [] - for key in ["output", "losses", "posmultipliers", "integmultipliers"]: + for key in ["output", "posmultipliers", "integmultipliers"]: self.training[key] = [] self.validation[key] = [] self.experimental[key] = [] @@ -511,10 +455,6 @@ def _generate_observables( self.validation["output"].append(exp_layer["output_vl"]) self.experimental["output"].append(exp_layer["output"]) - self.training["losses"].append(exp_layer["loss_tr"]) - self.validation["losses"].append(exp_layer["loss_vl"]) - self.experimental["losses"].append(exp_layer["loss"]) - # Generate the positivity penalty for pos_dict in self.pos_info: if not self.mode_hyperopt: @@ -534,9 +474,7 @@ def _generate_observables( # The positivity should be on both training and validation models self.training["output"].append(pos_layer["output_tr"]) - self.training["losses"].append(pos_layer["loss_tr"]) self.validation["output"].append(pos_layer["output_tr"]) - self.validation["losses"].append(pos_layer["loss_tr"]) self.training["posmultipliers"].append(pos_multiplier) self.training["posinitials"].append(pos_initial) @@ -563,7 +501,6 @@ def _generate_observables( # The integrability all falls to the training self.training["output"].append(integ_layer["output_tr"]) - self.training["losses"].append(integ_layer["loss_tr"]) self.training["integmultipliers"].append(integ_multiplier) self.training["integinitials"].append(integ_initial) @@ -610,11 +547,11 @@ def _generate_pdf( pdf_model: MetaModel pdf model """ - log.info("Generating PDF model") + log.info("Generating PDF models") # Set the parameters of the NN # Generate the NN layers - pdf_model = model_gen.pdfNN_layer_generator( + pdf_models = model_gen.pdfNN_layer_generator( nodes=nodes_per_layer, activations=activation_per_layer, layer_type=layer_type, @@ -626,27 +563,9 @@ def _generate_pdf( regularizer=regularizer, regularizer_args=regularizer_args, impose_sumrule=self.impose_sumrule, + parallel_models=self.parallel_models, ) - return pdf_model - - def _assign_data(self, models, fold_k=0): - """Assign to each model the data to compare with as well as the - number of data points in the model. - In the most general case training and validation get assigned the replic'd data - while experimental gets the actual data. - In the kfolding case (i.e, partition != None), they all receive the same data - but the folded data is set to 0 for training and validation - """ - training = _assign_data_to_model(models["training"], self.training, fold_k) - validation = _assign_data_to_model(models["validation"], self.validation, fold_k) - experimental = _assign_data_to_model(models["experimental"], self.experimental, fold_k) - - ret = { - "training": training, - "validation": validation, - "experimental": experimental, - } - return ret + return pdf_models def _prepare_reporting(self, partition): """Parses the information received by the :py:class:`n3fit.ModelTrainer.ModelTrainer` @@ -697,10 +616,11 @@ def _train_and_fit(self, training_model, stopping_object, epochs=100): callbacks=self.callbacks + [callback_st, callback_pos, callback_integ], ) - if stopping_object.positivity_pass(): + # TODO: in order to use multireplica in hyperopt is is necessary to define what "passing" means + # for now consider the run as good if any replica passed + if any([bool(i) for i in stopping_object.e_best_chi2]): return self.pass_status - else: - return self.failed_status + return self.failed_status def _hyperopt_override(self, params): """ Unrolls complicated hyperopt structures into very simple dictionaries""" @@ -711,8 +631,8 @@ def _hyperopt_override(self, params): for key, value in item.items(): params[key] = value - def enable_tensorboard(self, logdir, weight_freq = 0, profiling=False): - """ Enables tensorboard callback for further runs of the fitting procedure + def enable_tensorboard(self, logdir, weight_freq=0, profiling=False): + """Enables tensorboard callback for further runs of the fitting procedure Parameters ---------- @@ -723,7 +643,9 @@ def enable_tensorboard(self, logdir, weight_freq = 0, profiling=False): profiling: bool flag to enable the tensorboard profiler """ - callback_tb = callbacks.gen_tensorboard_callback(logdir, profiling=profiling, histogram_freq=weight_freq) + callback_tb = callbacks.gen_tensorboard_callback( + logdir, profiling=profiling, histogram_freq=weight_freq + ) self.callbacks.append(callback_tb) def evaluate(self, stopping_object): @@ -741,15 +663,13 @@ def evaluate(self, stopping_object): val_chi2 : chi2 of the validation set exp_chi2: chi2 of the experimental data (without replica or tr/vl split) """ - if self.model_dicts is None: + if self.training["model"] is None: raise RuntimeError("Modeltrainer.evaluate was called before any training") # Needs to receive a `stopping_object` in order to select the part of the # training and the validation which are actually `chi2` and not part of the penalty - training = self.model_dicts["training"] - experimental = self.model_dicts["experimental"] - train_chi2 = stopping_object.evaluate_training(training["model"]) - val_chi2, _ = stopping_object.validation.loss() - exp_chi2 = experimental["model"].compute_losses()["loss"] / experimental["ndata"] + train_chi2 = stopping_object.evaluate_training(self.training["model"]) + val_chi2 = stopping_object.vl_chi2 + exp_chi2 = self.experimental["model"].compute_losses()["loss"] / self.experimental["ndata"] return train_chi2, val_chi2, exp_chi2 def hyperparametrizable(self, params): @@ -800,7 +720,6 @@ def hyperparametrizable(self, params): threshold_chi2 = params.get("threshold_chi2", CHI2_THRESHOLD) # Initialize the chi2 dictionaries - l_train = [] l_valid = [] l_exper = [] l_hyper = [] @@ -814,7 +733,7 @@ def hyperparametrizable(self, params): seed = np.random.randint(0, pow(2, 31)) # Generate the pdf model - pdf_model = self._generate_pdf( + pdf_models = self._generate_pdf( params["nodes_per_layer"], params["activation_per_layer"], params["initializer"], @@ -827,12 +746,14 @@ def hyperparametrizable(self, params): # Model generation joins all the different observable layers # together with pdf model generated above - models = self._model_generation(pdf_model, partition) + models = self._model_generation(pdf_models, partition, k) # Only after model generation, apply possible weight file + # TODO: not sure whether it is a good idea that all of them start at the same point if self.model_file: log.info("Applying model file %s", self.model_file) - pdf_model.load_weights(self.model_file) + for pdf_model in pdf_models: + pdf_model.load_weights(self.model_file) if k > 0: # Reset the positivity and integrability multipliers @@ -840,17 +761,12 @@ def hyperparametrizable(self, params): initial_values = self.training["posinitials"] + self.training["posinitials"] models["training"].reset_layer_weights_to(pos_and_int, initial_values) - # Assign data to each model - # model dicts is similar to model but includes information about - # the target data and number of points - model_dicts = self._assign_data(models, k) - # Generate the list containing reporting info necessary for chi2 reporting = self._prepare_reporting(partition) if self.no_validation: # Substitute the validation model with the training model - model_dicts["validation"] = model_dicts["training"] + models["validation"] = models["training"] validation_model = models["training"] else: validation_model = models["validation"] @@ -860,16 +776,16 @@ def hyperparametrizable(self, params): stopping_object = Stopping( validation_model, reporting, - pdf_model, + pdf_models, total_epochs=epochs, stopping_patience=stopping_epochs, - save_weights_each=self.save_weights_each, threshold_positivity=threshold_pos, - threshold_chi2=threshold_chi2 + threshold_chi2=threshold_chi2, ) # Compile each of the models with the right parameters - _model_compilation(model_dicts, params["optimizer"]) + for model in models.values(): + model.compile(**params["optimizer"]) passed = self._train_and_fit( models["training"], @@ -877,66 +793,65 @@ def hyperparametrizable(self, params): epochs=epochs, ) - # Save validation and training chi2 - training_loss = stopping_object.tr_chi2 - validation_loss = stopping_object.vl_chi2 + if self.mode_hyperopt: + # TODO: currently only working for one single replica + # If doing a hyperparameter scan we need to keep track at this point of the loss function + validation_loss = stopping_object.vl_chi2 - # Compute experimental loss - exp_loss_raw = models["experimental"].compute_losses()["loss"] - experimental_loss = exp_loss_raw / model_dicts["experimental"]["ndata"] + # Compute experimental loss + exp_loss_raw = np.take(models["experimental"].compute_losses()["loss"], -1) + # And divide by the number of active points in this fold + # it would be nice to have a ndata_per_fold variable coming in the vp object... + ndata = np.sum([np.count_nonzero(i[k]) for i in self.experimental["folds"]]) + experimental_loss = exp_loss_raw / ndata - if self.mode_hyperopt: hyper_loss = experimental_loss if passed != self.pass_status: log.info("Hyperparameter combination fail to find a good fit, breaking") # If the fit failed to fit, no need to add a penalty to the loss break for penalty in self.hyper_penalties: - hyper_loss += penalty(pdf_model, stopping_object) + hyper_loss += penalty(pdf_models[0], stopping_object) l_hyper.append(hyper_loss) - log.info("Fold %d finished, loss=%.1f, pass=%s", k+1, hyper_loss, passed) + log.info("Fold %d finished, loss=%.1f, pass=%s", k + 1, hyper_loss, passed) if hyper_loss > self.hyper_threshold: - log.info("Loss above threshold (%.1f > %.1f), breaking", hyper_loss, self.hyper_threshold) + log.info( + "Loss above threshold (%.1f > %.1f), breaking", + hyper_loss, + self.hyper_threshold, + ) # Apply a penalty proportional to the number of folds that have not been computed pen_mul = len(self.kpartitions) - k l_hyper = [i * pen_mul for i in l_hyper] break - # Save all losses - l_train.append(training_loss) - l_valid.append(validation_loss) - l_exper.append(experimental_loss) - - dict_out = { - "status": passed, - "training_loss": np.average(l_train), - "validation_loss": np.average(l_valid), - "experimental_loss": np.average(l_exper), - } + # Save all losses + l_valid.append(validation_loss) + l_exper.append(experimental_loss) if self.mode_hyperopt: - dict_out["loss"] = self.hyper_loss(l_hyper) - dict_out["kfold_meta"] = { - "training_losses": l_train, - "validation_losses": l_valid, - "experimental_losses": l_exper, - "hyper_losses": l_hyper, + # Hyperopt needs a dictionary with information about the losses + # it is possible to store arbitrary information in the trial file by adding it to this dictionary + dict_out = { + "status": passed, + "loss": self.hyper_loss(l_hyper), + "validation_loss": np.average(l_valid), + "experimental_loss": np.average(l_exper), + "kfold_meta": { + "validation_losses": l_valid, + "experimental_losses": l_exper, + "hyper_losses": l_hyper, + }, } - # If we are using hyperopt we don't need to output any other information return dict_out - dict_out["loss"] = experimental_loss - - # Add to the output dictionary things that are needed by performfit.py - # to generate the output pdf, check the arc-length, gather stats, etc - # some of them are already attributes of the class so they are redundant here - # but I think it's good to present them explicitly - dict_out["stopping_object"] = stopping_object - dict_out["experimental"] = self.experimental - dict_out["training"] = self.training - dict_out["pdf_model"] = pdf_model - - # Only after the training has finished, we save all models for future reporting - self.model_dicts = model_dicts + # Keep a reference to the models after training for future reporting + self.training["model"] = models["training"] + self.experimental["model"] = models["experimental"] + self.validation["model"] = models["validation"] + # In a normal run, the only information we need to output is the stopping object + # (which contains metadata about the stopping) + # and the pdf models (which are used to generate the PDF grids and compute arclengths) + dict_out = {"status": passed, "stopping_object": stopping_object, "pdf_models": pdf_models} return dict_out diff --git a/n3fit/src/n3fit/backends/__init__.py b/n3fit/src/n3fit/backends/__init__.py index 3370ae75a8..726a752eda 100644 --- a/n3fit/src/n3fit/backends/__init__.py +++ b/n3fit/src/n3fit/backends/__init__.py @@ -12,7 +12,6 @@ regularizer_selector, Concatenate, ) -from n3fit.backends.keras_backend import losses from n3fit.backends.keras_backend import operations from n3fit.backends.keras_backend import constraints from n3fit.backends.keras_backend import callbacks diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index fe17eaec99..3bc7862cd1 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -4,19 +4,13 @@ Extension of the backend Model class containing some wrappers in order to absorb other backend-dependent calls. """ - +import re import numpy as np import tensorflow as tf from tensorflow.keras.models import Model from tensorflow.keras import optimizers as Kopt -from n3fit.backends.keras_backend.operations import numpy_to_tensor - -# Check the TF version to check if legacy-mode is needed (TF < 2.2) -tf_version = tf.__version__.split('.') -if int(tf_version[0]) == 2 and int(tf_version[1]) < 2: - LEGACY = True -else: - LEGACY = False +from tensorflow.python.keras.utils import tf_utils +from n3fit.backends.keras_backend import operations as op # Define in this dictionary new optimizers as well as the arguments they accept # (with default values if needed be) @@ -35,6 +29,12 @@ v[1]["clipnorm"] = 1.0 +def _default_loss(y_true, y_pred): + """ Default loss to be used when the model is compiled with loss = Null + (for instance if the prediction of the model is already the loss """ + return op.sum(y_pred) + + def _fill_placeholders(original_input, new_input=None): """ Fills the placeholders of the original input with a new set of input @@ -113,7 +113,7 @@ def __init__(self, input_tensors, output_tensors, **kwargs): # otherwise, put a placeholder None as it will come from the outside name = input_tensor.name.rsplit(":",1)[0] try: - self.x_in[name] = numpy_to_tensor(input_tensor.tensor_content) + self.x_in[name] = op.numpy_to_tensor(input_tensor.tensor_content) self.tensors_in[name] = input_tensor except AttributeError: self.x_in[name] = None @@ -122,7 +122,7 @@ def __init__(self, input_tensors, output_tensors, **kwargs): self.all_inputs = input_list self.all_outputs = output_list self.target_tensors = None - self.eval_fun = None + self.compute_losses_function = None def _parse_input(self, extra_input=None, pass_content=True): """ Returns the input tensors the model was compiled with. @@ -157,7 +157,7 @@ def perform_fit(self, x=None, y=None, epochs=1, **kwargs): x = self._parse_input(self.x_in) if y is None: y = self.target_tensors - history = super().fit(x=x, y=y, epochs=epochs, **kwargs,) + history = super().fit(x=x, y=y, epochs=epochs, **kwargs) loss_dict = history.history return loss_dict @@ -169,45 +169,51 @@ def predict(self, x=None, **kwargs): def compute_losses(self): """ - This function is the fast-equivalent to the model ``evaluate(x,y)`` method. + This function is equivalent to the model ``evaluate(x,y)`` method of most TensorFlow models + which return a dictionary of losses per output layer. + The losses reported in the ``evaluate`` method for n3fit are, however, summed over replicas. + Instead the loss we are interested in is usually the output of the model (i.e., predict) - On first call it calls ``.evaluate(return_dict=True, verbose=0)`` to force - the initialization of the test function. - Subsequent calls of this method will (when applicable) - directly call the internal evaluation function ``eval_fun``. - This bypasses the pre- and post- evaluation steps, resulting in a ~10% speed up - with respect to ``.evaluate(...)`` + This function then generates a dict of partial losses of the model separated per replica. + i.e., the output for experiment {'LHC_exp'} will be an array of Nrep elements. Returns ------- dict a dictionary with all partial losses of the model """ - if self.eval_fun is None: - # We still need to perform some initialization - if LEGACY: - # For TF < 2.2 we need to generate the test_function ourselves - self.make_test_function() - else: - return self.evaluate(return_dict=True, verbose=False) - if LEGACY: - # For tF < 2.2 we need to force the output to be a float - ret = self.eval_fun() - ret['loss'] = ret['loss'].numpy() - return ret - else: - return self.eval_fun() + # TODO might not work for TF < 2.2 + if self.compute_losses_function is None: + # If it is the first time we are passing through, compile the function and save it + out_names = [f"{i}_loss" for i in self.output_names] + out_names.insert(0, "loss") + + # Compile a evaluation function + @tf.function + def losses_fun(): + predictions = self(self._parse_input(None)) + # If we only have one dataset the output changes + if len(out_names) == 2: + predictions = [predictions] + total_loss = tf.reduce_sum(predictions, axis=0) + ret = [total_loss] + predictions + return dict(zip(out_names, ret)) + + self.compute_losses_function = losses_fun + + ret = self.compute_losses_function() + + # The output of this function is to be used by python (and numpy) so we need to convert + # the tensorflow variable to python primitives or numpy arrays. + # Undocumented TF function that converts all the tensors from the ret dictionary to numpy arrays + # if it dissapears, equivalent for us to {k: i.numpy() for k, i in ret.items()} + try: + dict_result = tf_utils.to_numpy_or_python_type(ret) + except AttributeError: + # For TF < 2.2 + dict_result = {k: i.numpy() for k, i in ret.items()} + return dict_result - def evaluate(self, x=None, y=None, **kwargs): - """ - Wrapper around evaluate to take into account the case in which the data is already known - when the model is compiled. - """ - x = self._parse_input(self.x_in) - if LEGACY and y is None: - y = self.target_tensors - result = super().evaluate(x=x, y=y, **kwargs) - return result def compile( self, @@ -250,6 +256,9 @@ def compile( f"[MetaModel.select_initializer] optimizer not implemented: {optimizer_name}" ) from e + if loss is None: + loss = _default_loss + opt_function = opt_tuple[0] opt_args = opt_tuple[1] @@ -263,60 +272,16 @@ def compile( # Instantiate the optimizer opt = opt_function(**opt_args) - # If given target output, compile it together with the model for better performance - if target_output is not None: + # If given target output is None, target_output is unnecesary, save just a zero per output + if target_output is None: + self.target_tensors = [np.zeros((1,1)) for i in self.output_shape] + else: if not isinstance(target_output, list): target_output = [target_output] - # Tensorize self.target_tensors = target_output - # Reset the evaluation function (if any) - self.eval_fun = None - super(MetaModel, self).compile(optimizer=opt, loss=loss) - def make_test_function(self): - """ If the model has been compiled with target data, it creates - a specific evaluate function with the target data already evaluated. - Otherwise return the normal tensorflow behaviour. - """ - if self.eval_fun is not None: - return self.eval_fun - - if self.target_tensors is None: - return super().make_test_function() - - # Recover the target tensors and their lengths, we cannot rely - # directly on the output from the model as we might have target_tensors - # with 0 data points (if the tr/vl mask covers the whole set) - lens = [] - tt = [] - for target in self.target_tensors: - lens.append(target.size) - tt.append(numpy_to_tensor(target)) - # Save target_tensors as tensors, as it might be useful for LEGACY - self.target_tensors = tt - - # Get the name of the output layer - # and add the suffix _loss to match TF behaviour - out_names = [f"{i}_loss" for i in self.output_names] - out_names.insert(0, "loss") - - @tf.function - def eval_fun(*args): - predictions = self(self._parse_input(None)) - # Concatenate the output to split them again as a list - ypred = tf.concat(predictions, axis=-1) - predspl = tf.split(ypred, lens, axis=-1) - loss_list = [lfun(target, pred) for target, pred, lfun in zip(tt, predspl, self.loss)] - ret = [tf.reduce_sum(loss_list)] + loss_list - return dict(zip(out_names, ret)) - - # Save the function so we don't go through this again - self.eval_fun = eval_fun - - return eval_fun - def set_masks_to(self, names, val=0.0): """ Set all mask value to the selected value Masks in MetaModel should be named {name}_mask @@ -369,3 +334,8 @@ def apply_as_layer(self, x): # TF 2.0 seems to fail with ValueError when passing a dictionary as an input y = x.values() return super().__call__(y) + + def get_layer_re(self, regex): + """ Get all layers matching the given regular expression """ + check = lambda x: re.match(regex, x.name) + return list(filter(check, self.layers)) diff --git a/n3fit/src/n3fit/backends/keras_backend/losses.py b/n3fit/src/n3fit/backends/keras_backend/losses.py deleted file mode 100644 index c4b427dd68..0000000000 --- a/n3fit/src/n3fit/backends/keras_backend/losses.py +++ /dev/null @@ -1,74 +0,0 @@ -""" - Module containing a list of loss functions availables to the fitting code -""" - -import tensorflow as tf -from tensorflow.keras import backend as K - - -def l_invcovmat(invcovmat_np): - """ - Returns a loss function such that: - L = \sum_{ij} (yt - yp)_{i} invcovmat_{ij} (yt - yp)_{j} - """ - invcovmat = K.constant(invcovmat_np) - - def true_loss(y_true, y_pred): - # (yt - yp) * covmat * (yt - yp) - tmp = y_true - y_pred - right_dot = tf.tensordot(invcovmat, K.transpose(tmp), axes=1) - res = tf.tensordot(tmp, right_dot, axes=1) - return tf.reshape(res, (-1,)) - - return true_loss - - -def l_positivity(alpha=1e-7): - """ - Returns L = elu(y_pred) (considers y_true as 0) - - The positivity loss is computed by inverting the sign of the - datapoints and then applying the elu function, this function is - f(x) = x if x > 0 - f(x) = alpha * (e^{x} - 1) if x < 0 - This is done to avoid a big discontinuity in the derivative at 0 when - the lagrange multiplier is very big. - In practice this function can produce results in the range (-alpha, inf) - """ - - def true_loss(y_true, y_pred): - y = -y_pred - loss = K.elu(y, alpha=alpha) - res = K.sum(loss) - return tf.reshape(res, (-1,)) - - return true_loss - - -def l_integrability(): - """ - Returns (y_pred)*(y_pred) - """ - - def true_loss(y_true, y_pred): - loss = K.square(y_pred) - res = K.sum(loss, keepdims=True) - return tf.reshape(res, (-1,)) - - return true_loss - - -def l_diaginvcovmat(diaginvcovmat_np): - """ - Returns a loss function such that: - L = sum_{i} (yt - yp)_{i} invcovmat_{ii} (yt - yp)_{i} - diaginvcovmat_np should be 1d - """ - invcovmat = K.constant(diaginvcovmat_np) - - def true_loss(y_true, y_pred): - tmp = y_true - y_pred - res = tf.tensordot(invcovmat, K.transpose(tmp * tmp), axes=1) - return tf.reshape(res, (-1,)) - - return true_loss diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py index 5bad7ef570..3e2308e19c 100644 --- a/n3fit/src/n3fit/backends/keras_backend/operations.py +++ b/n3fit/src/n3fit/backends/keras_backend/operations.py @@ -234,6 +234,13 @@ def concatenate(tensor_list, axis=-1, target_shape=None, name=None): return concatenated_tensor +def stack(tensor_list, axis=0, **kwargs): + """ Stack a list of tensors + see full `docs `_ + """ + return tf.stack(tensor_list, axis=axis, **kwargs) + + # Mathematical operations def pdf_masked_convolution(raw_pdf, basis_mask): """ Computes a masked convolution of two equal pdfs @@ -243,7 +250,7 @@ def pdf_masked_convolution(raw_pdf, basis_mask): Parameters ---------- pdf: tf.tensor - rank 3 (batchsize, xgrid, flavours) + rank 4 (batchsize, xgrid, flavours, replicas) basis_mask: tf.tensor rank 2 tensor (flavours, flavours) mask to apply to the pdf convolution @@ -254,11 +261,8 @@ def pdf_masked_convolution(raw_pdf, basis_mask): rank3 (len(mask_true), xgrid, xgrid) """ pdf = tf.squeeze(raw_pdf, axis=0) # remove the batchsize - luminosity = tensor_product(pdf, pdf, axes=0) - # (xgrid, flavour, xgrid, flavour) - # reshape to put the flavour indices at the beginning to apply mask - lumi_tmp = K.permute_dimensions(luminosity, (3, 1, 2, 0)) - pdf_x_pdf = boolean_mask(lumi_tmp, basis_mask) + luminosity = tf.einsum('air,bjr->jibar', pdf, pdf) + pdf_x_pdf = boolean_mask(luminosity, basis_mask) return pdf_x_pdf @@ -270,6 +274,13 @@ def tensor_product(*args, **kwargs): return tf.tensordot(*args, **kwargs) +def einsum(equation, *args, **kwargs): + """ + Computes the tensor product using einsum + See full `docs `_ + """ + return tf.einsum(equation, *args, **kwargs) + @tf.function(experimental_relax_shapes=True) def op_log(o_tensor, **kwargs): @@ -295,6 +306,7 @@ def split(*args, **kwargs): """ return tf.split(*args, **kwargs) + def scatter_to_one(values, indices=[[1]], output_dim=14): """ Like scatter_nd initialized to one instead of zero @@ -302,3 +314,13 @@ def scatter_to_one(values, indices=[[1]], output_dim=14): """ ones = np.ones(output_dim, dtype=np.float32) return tf.tensor_scatter_nd_update(ones, indices, values) + + +@tf.function +def backend_function(fun_name, *args, **kwargs): + """ + Wrapper to call non-explicitly implemented backend functions by name: (``fun_name``) + see full `docs `_ for some possibilities + """ + fun = getattr(K, fun_name) + return fun(*args, **kwargs) diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py index b96c2d0117..8c99e631c7 100644 --- a/n3fit/src/n3fit/checks.py +++ b/n3fit/src/n3fit/checks.py @@ -352,3 +352,23 @@ def check_consistent_basis(fitting, theoryid): raise CheckError(f"{theoryid} (intrinsic charm) is incompatible with basis {fitbasis}") if not theoryid.get_description()["IC"] and has_c: raise CheckError(f"{theoryid} (perturbative charm) is incompatible with basis {fitbasis}") + + +@make_argcheck +def can_run_multiple_replicas(fitting, replica, hyperopt, parallel_models=1): + """ Checks whether a runcard which is trying to run several replicas at once (parallel_models =/= 1) is valid + """ + rp = len(replica) + genrep = fitting.get("genrep") + if rp > 1 and not genrep: + raise CheckError("Can't run more than one replica at once if no replicas are to be generated") + if parallel_models == 1: + return + if hyperopt: + raise CheckError("Running replicas in parallel with hyperopt is still not supported") + if genrep: + raise CheckError("Replica generation is not supported yet for parallel models") + if fitting["parameters"].get("layer_type") != "dense": + raise CheckError("Parallelization has only been tested with layer_type=='dense'") + if rp > 1: + raise CheckError("Parallel mode cannot be used together with multireplica runs") diff --git a/n3fit/src/n3fit/hyper_optimization/penalties.py b/n3fit/src/n3fit/hyper_optimization/penalties.py index 7d3ef24708..b6bad1c8f3 100644 --- a/n3fit/src/n3fit/hyper_optimization/penalties.py +++ b/n3fit/src/n3fit/hyper_optimization/penalties.py @@ -83,11 +83,11 @@ def patience(pdf_model, stopping_object, alpha=1e-4): 3.434143467595683 """ - epoch_best = stopping_object.e_best_chi2 + epoch_best = np.take(stopping_object.e_best_chi2, 0) patience = stopping_object.stopping_patience max_epochs = stopping_object.total_epochs diff = abs(max_epochs - patience - epoch_best) - vl_loss = stopping_object.vl_chi2 + vl_loss = np.take(stopping_object.vl_chi2, 0) return vl_loss * np.exp(alpha * diff) diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index 7ae1fb24a0..48dabd5c2a 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -69,6 +69,11 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): # Check the directory exist, if it doesn't, generate it os.makedirs(replica_path_set, exist_ok=True) + stop_epoch = self.stopping_object.stop_epoch + + # Get the replica status for this object + replica_status = self.stopping_object.get_next_replica() + # export PDF grid to file storefit( self.pdf_object, @@ -76,7 +81,7 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): replica_path_set, fitname, self.q2, - self.stopping_object.e_best_chi2, + replica_status.best_epoch, vl_chi2, tr_chi2, true_chi2, @@ -84,26 +89,33 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): integrability_numbers, allchi2_lines, preproc_lines, - self.stopping_object.positivity_status(), + replica_status.positivity_status, self.timings, ) - # TODO: compute the chi2s directly from the stopping object # export all metadata from the fit to a single yaml file output_file = f"{replica_path_set}/{fitname}.json" json_dict = jsonfit( - self.stopping_object, self.pdf_object, tr_chi2, vl_chi2, true_chi2, self.timings + replica_status, self.pdf_object, tr_chi2, vl_chi2, true_chi2, stop_epoch, self.timings ) with open(output_file, "w") as fs: - json.dump(json_dict, fs, indent=2) + json.dump(json_dict, fs, indent=2, cls = SuperEncoder) + + +class SuperEncoder(json.JSONEncoder): + """ Custom json encoder to get around the fact that np.float32 =/= float """ + def default(self, o): + if isinstance(o, np.float32): + return float(o) + return super().default(o) -def jsonfit(stopping_object, pdf_object, tr_chi2, vl_chi2, true_chi2, timing): +def jsonfit(replica_status, pdf_object, tr_chi2, vl_chi2, true_chi2, stop_epoch, timing): """Generates a dictionary containing all relevant metadata for the fit Parameters ---------- - stopping_object: n3fit.stopping.Stopping + replica_status: n3fit.stopping.ReplicaBest a stopping.Validation object pdf_object: n3fit.vpinterface.N3PDF N3PDF object constructed from the pdf_model @@ -114,6 +126,8 @@ def jsonfit(stopping_object, pdf_object, tr_chi2, vl_chi2, true_chi2, timing): chi2 for the validation true_chi2: float chi2 for the exp (unreplica'd data) + epoch_stop: int + epoch at which the stopping stopped (not the one for the best fit!) timing: dict dictionary of the timing of the different events that happened """ @@ -121,12 +135,12 @@ def jsonfit(stopping_object, pdf_object, tr_chi2, vl_chi2, true_chi2, timing): # Generate preprocessing information all_info["preprocessing"] = pdf_object.get_preprocessing_factors() # .fitinfo-like info - all_info["stop_epoch"] = stopping_object.stop_epoch - all_info["best_epoch"] = stopping_object.e_best_chi2 + all_info["stop_epoch"] = stop_epoch + all_info["best_epoch"] = replica_status.best_epoch all_info["erf_tr"] = tr_chi2 all_info["erf_vl"] = vl_chi2 all_info["chi2"] = true_chi2 - all_info["pos_state"] = stopping_object.positivity_status() + all_info["pos_state"] = replica_status.positivity_status all_info["arc_lengths"] = pdf_object.compute_arclength().tolist() all_info["integrability"] = pdf_object.integrability_numbers().tolist() all_info["timing"] = timing diff --git a/n3fit/src/n3fit/layers/DIS.py b/n3fit/src/n3fit/layers/DIS.py index 570eb11456..1782ece969 100644 --- a/n3fit/src/n3fit/layers/DIS.py +++ b/n3fit/src/n3fit/layers/DIS.py @@ -18,8 +18,8 @@ class DIS(Observable): the incoming pdf. The fktable is expected to be rank 3 (ndata, xgrid, flavours) - while the input pdf is also rank 3 where the first dimension is the batch dimension - (1, xgrid, flavours) + while the input pdf is rank 4 where the first dimension is the batch dimension + and the last dimension the number of replicas being fitted (1, xgrid, flavours, replicas) """ def gen_mask(self, basis): @@ -50,12 +50,12 @@ def call(self, pdf): Parameters ---------- pdf: backend tensor - rank 3 tensor (batch_size, xgrid, flavours) + rank 4 tensor (batch_size, xgrid, flavours, replicas) Returns ------- result: backend tensor - rank 1 tensor (batchsize, ndata) + rank 3 tensor (batchsize, replicas, ndata) """ # DIS never needs splitting if self.splitting is not None: diff --git a/n3fit/src/n3fit/layers/DY.py b/n3fit/src/n3fit/layers/DY.py index 0cacff7897..5d714731ea 100644 --- a/n3fit/src/n3fit/layers/DY.py +++ b/n3fit/src/n3fit/layers/DY.py @@ -30,12 +30,12 @@ def call(self, pdf_raw): Parameters ---------- pdf_in: tensor - rank 3 tensor (batchsize, xgrid, flavours) + rank 4 tensor (batchsize, xgrid, flavours, replicas) Returns ------- results: tensor - rank 2 tensor (batchsize, ndata) + rank 3 tensor (batchsize, replicas, ndata) """ # Hadronic observables might need splitting of the input pdf in the x dimension # so we have 3 different paths for this layer @@ -60,5 +60,5 @@ def call(self, pdf_raw): results.append(res) # the masked convolution removes the batch dimension - ret = self.operation(results) + ret = op.transpose(self.operation(results)) return op.batchit(ret) diff --git a/n3fit/src/n3fit/layers/Rotations.py b/n3fit/src/n3fit/layers/Rotations.py index 0933d0d0d0..c5f08a1a49 100644 --- a/n3fit/src/n3fit/layers/Rotations.py +++ b/n3fit/src/n3fit/layers/Rotations.py @@ -64,9 +64,9 @@ class FkRotation(MetaLayer): # TODO: Generate a rotation matrix in the input and just do tf.tensordot in call # the matrix should be: (8, 14) so that we can just do tf.tensordot(pdf, rotmat, axes=1) # i.e., create the matrix and inherit from the Rotation layer above - def __init__(self, output_dim=14, **kwargs): + def __init__(self, output_dim=14, name="evolution", **kwargs): self.output_dim = output_dim - super().__init__(**kwargs, name="evolution") + super().__init__(name=name, **kwargs) def call(self, pdf_raw): # Transpose the PDF so that the flavour index is the first one diff --git a/n3fit/src/n3fit/layers/losses.py b/n3fit/src/n3fit/layers/losses.py new file mode 100644 index 0000000000..78b7f3e0ea --- /dev/null +++ b/n3fit/src/n3fit/layers/losses.py @@ -0,0 +1,143 @@ +""" + Module containg the losses to be apply to the models as layers + + The layer take the input from the model and acts on it producing a score function. + For instance, in the case of the chi2 (``LossInvcovmat``) the function takes only + the prediction of the model and, during instantiation, took the real data to compare with + and the covmat. + +""" +import numpy as np +from n3fit.backends import MetaLayer +from n3fit.backends import operations as op + + +class LossInvcovmat(MetaLayer): + """ + Loss function such that: + L = \sum_{ij} (yt - yp)_{i} invcovmat_{ij} (yt - yp)_{j} + + Takes as argument the inverse of the covmat and the target data. + It also takes an optional argument to mask part of the predictions + + Example + ------- + >>> import numpy as np + >>> from n3fit.layers import losses + >>> C = np.random.rand(5,5) + >>> data = np.random.rand(1, 1, 5) + >>> pred = np.random.rand(1, 1, 5) + >>> invC = np.linalg.inv( C @ C.T) + >>> loss_f = losses.LossInvcovmat(invC, data) + >>> loss_f(pred).shape == 1 + True + """ + + def __init__(self, invcovmat, y_true, mask=None, **kwargs): + # If we have a diagonal matrix, padd with 0s and hope it's not too heavy on memory + if len(invcovmat.shape) == 1: + invcovmat = np.diag(invcovmat) + self.invcovmat = op.numpy_to_tensor(invcovmat) + self.y_true = op.numpy_to_tensor(y_true) + if mask is None or all(mask): + self.mask = None + else: + mask = np.array(mask, dtype=np.float32).reshape(1,1,-1) + self.mask = op.numpy_to_tensor(mask) + super().__init__(**kwargs) + + def call(self, y_pred, **kwargs): + tmp = self.y_true - y_pred + if self.mask is not None: + tmp = op.op_multiply([tmp, self.mask]) + res = op.einsum("bri, ij, brj -> r", tmp, self.invcovmat, tmp) + return res + + +class LossLagrange(MetaLayer): + """ + Abstract loss function to apply lagrange multipliers to a model. + + L = \lambda * f(y) + + The form of f(y) is given by modifying the ``apply_loss`` method. + It is possible to modify how the multiplication of the lambda factor is implemented + by modifying the ``apply_multiplier`` method. + + The (non trainable) weight containing the multiplier is named ``lagMult``. + """ + + def __init__(self, c=1.0, **kwargs): + self._initial_multiplier = c + super().__init__(**kwargs) + + def build(self, input_shape): + multiplier = MetaLayer.init_constant(self._initial_multiplier) + self.kernel = self.builder_helper("lagMult", (1,), multiplier, trainable=False) + super().build(input_shape) + + def apply_multiplier(self, y): + return self.kernel * y + + def apply_loss(self, y): + return y + + def call(self, y_pred, **kwargs): + y = self.apply_multiplier(y_pred) + return self.apply_loss(y) + + +class LossPositivity(LossLagrange): + """ + Returns L = \lambda*elu(y_pred) + + The positivity loss is computed by inverting the sign of the + datapoints and then applying the elu function, this function is + f(x) = x if x > 0 + f(x) = alpha * (e^{x} - 1) if x < 0 + This is done to avoid a big discontinuity in the derivative at 0 when + the lagrange multiplier is very big. + In practice this function can produce results in the range (-alpha, inf) + + Example + ------- + >>> import numpy as np + >>> from n3fit.layers import losses + >>> pred = np.random.rand(1, 1, 5) + >>> alpha = 1e-7 + >>> c = 1e8 + >>> loss_f = losses.LossPositivity(c=c, alpha=alpha) + >>> loss_f(pred) == -5*alpha + True + >>> loss_f(-pred) > c + True + """ + + def __init__(self, alpha=1e-7, **kwargs): + self.alpha = alpha + super().__init__(**kwargs) + + def apply_loss(self, y_pred): + loss = op.backend_function("elu", -y_pred, alpha=self.alpha) + # Sum over the batch and the datapoints + return op.sum(loss, axis=[0, -1]) + + +class LossIntegrability(LossLagrange): + """ + Returns L = (y_pred)*(y_pred) + + Example + ------- + >>> import numpy as np + >>> from n3fit.layers import losses + >>> pred = np.random.rand(1, 1, 5) + >>> loss_f = losses.LossIntegrability(c=1e2) + >>> loss_f(pred) > 0 + True + """ + + def apply_loss(self, y_pred): + y = op.backend_function("square", y_pred) + # Sum over the batch and the datapoints + return op.sum(y, axis=[0, -1]) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 03857e4fbd..fa0c541cef 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -7,18 +7,77 @@ # pdfNN_layer_generator: Generates the PDF NN layer to be fitted """ +from dataclasses import dataclass +import numpy as np + import n3fit.msr as msr_constraints -from n3fit.layers import DIS, DY, Mask, ObsRotation +from n3fit.layers import DIS, DY, Mask, ObsRotation, losses from n3fit.layers import Preprocessing, FkRotation, FlavourToEvolution from n3fit.backends import MetaModel, Input -from n3fit.backends import operations -from n3fit.backends import losses +from n3fit.backends import operations as op from n3fit.backends import MetaLayer, Lambda from n3fit.backends import base_layer_selector, regularizer_selector -def observable_generator(spec_dict, positivity_initial=1.0, integrability=False): # pylint: disable=too-many-locals +@dataclass +class ObservableWrapper: + """Wrapper to generate the observable layer once the PDF model is prepared + It can take normal datasets or Lagrange-multiplier-like datasets + (such as positivity or integrability) + """ + + name: str + observables: list + dataset_xsizes: list + invcovmat: np.array = None + multiplier: float = 1.0 + integrability: bool = False + positivity: bool = False + data: np.array = None + rotation: ObsRotation = None # only used for diagonal covmat + + def _generate_loss(self, mask=None): + """Generates the corresponding loss function depending on the values the wrapper + was initialized with""" + if self.invcovmat is not None: + loss = losses.LossInvcovmat(self.invcovmat, self.data, mask, name=self.name) + elif self.positivity: + loss = losses.LossPositivity(name=self.name, c=self.multiplier) + elif self.integrability: + loss = losses.LossIntegrability(name=self.name, c=self.multiplier) + return loss + + def _generate_experimental_layer(self, pdf): + """ Generates the experimental layer from the PDF """ + # First split the layer into the different datasets (if needed!) + if len(self.dataset_xsizes) > 1: + splitting_layer = op.as_layer( + op.split, + op_args=[self.dataset_xsizes], + op_kwargs={"axis": 1}, + name=f"{self.name}_split", + ) + split_pdf = splitting_layer(pdf) + else: + split_pdf = [pdf] + # Every obs gets its share of the split + output_layers = [obs(p_pdf) for p_pdf, obs in zip(split_pdf, self.observables)] + # Concatenate all datasets (so that experiments are one single entity) + ret = op.concatenate(output_layers, axis=2) + if self.rotation is not None: + ret = self.rotation(ret) + return ret + + def __call__(self, pdf_layer, mask=None): + loss_f = self._generate_loss(mask) + experiment_prediction = self._generate_experimental_layer(pdf_layer) + return loss_f(experiment_prediction) + + +def observable_generator( + spec_dict, positivity_initial=1.0, integrability=False +): # pylint: disable=too-many-locals """ This function generates the observable model for each experiment. These are models which takes as input a PDF tensor (1 x size_of_xgrid x flavours) and outputs @@ -54,11 +113,9 @@ def observable_generator(spec_dict, positivity_initial=1.0, integrability=False) a dictionary with: - `inputs`: input layer - `output`: output layer (unmasked) - - `loss` : loss function (unmasked) - `output_tr`: output layer (training) - - `loss_tr` : loss function (training) - `output_vl`: output layer (validation) - - `loss_vl` : loss function (validation) + - `experiment_xsize` : int (size of the output array) """ spec_name = spec_dict["name"] dataset_xsizes = [] @@ -92,10 +149,7 @@ def observable_generator(spec_dict, positivity_initial=1.0, integrability=False) operation_name, name=f"dat_{dataset_name}", ) - if spec_dict["positivity"]: - obs_layer_ex = obs_layer_tr - obs_layer_vl = obs_layer_tr - else: + if not spec_dict["positivity"]: obs_layer_ex = Obs_Layer( dataset_dict["fktables"], dataset_dict["ex_fktables"], @@ -108,6 +162,8 @@ def observable_generator(spec_dict, positivity_initial=1.0, integrability=False) operation_name, name=f"val_{dataset_name}", ) + else: + obs_layer_ex = obs_layer_vl = None # Data transformation might need access to the full array of output data # therefore the validation and training layers should point to the full exp @@ -129,119 +185,71 @@ def observable_generator(spec_dict, positivity_initial=1.0, integrability=False) model_obs_vl.append(obs_layer_vl) model_obs_ex.append(obs_layer_ex) - # Prepare a concatenation as experiments are one single entity formed by many datasets - def gen_concat(name): - return operations.as_layer(operations.concatenate, op_kwargs={"axis": 1}, name=name) - - # Tensorflow operations have ugly name, - # we want the final observables to be named just {spec_name} (with'val/exp' if needed) - tr_name = spec_name - vl_name = f"{spec_name}_val" - ex_name = f"{spec_name}_exp" - concat_ex = gen_concat(ex_name) - # For data transformation all concatenations are the same - if spec_dict.get("data_transformation") is None: - concat_tr = gen_concat(tr_name) - concat_vl = gen_concat(vl_name) - else: - concat_tr = concat_ex - concat_vl = concat_ex - - # creating the experiment as a model turns out to bad for performance - def experiment_layer(pdf, model_obs=model_obs_ex, concat=concat_ex, rotation=None, datasets_out=None): - """ By default works with the experiment observable """ - output_layers = [] - # First split the pdf layer into the different datasets if needed - if len(dataset_xsizes) > 1: - splitting_layer = operations.as_layer( - operations.split, - op_args=[dataset_xsizes], - op_kwargs={"axis": 1}, - name=f"{spec_name}_split", - ) - split_pdf = splitting_layer(pdf) - else: - split_pdf = [pdf] - # every obs gets its share of the split - for partial_pdf, obs in zip(split_pdf, model_obs): - obs_output = obs(partial_pdf) - if datasets_out and obs.name[4:] in datasets_out: - mask_out = Mask(c=0.0, name=f"zero_{obs.name}") - obs_output = mask_out(obs_output) - output_layers.append(obs_output) - # Concatenate all datasets as experiments are one single entity if needed - ret = concat(output_layers) - if rotation is not None: - ret = rotation(ret) - return ret - - # Now create the model for this experiment full_nx = sum(dataset_xsizes) if spec_dict["positivity"]: - out_mask = Mask( - c=positivity_initial, - axis=1, - name=spec_name, + out_positivity = ObservableWrapper( + spec_name, + model_obs_tr, + dataset_xsizes, + multiplier=positivity_initial, + positivity=not integrability, + integrability=integrability, ) - def out_positivity(pdf_layer, datasets_out=None): - exp_result = experiment_layer(pdf_layer) - return out_mask(exp_result) - - if integrability: - loss = losses.l_integrability() - else: - loss = losses.l_positivity() - layer_info = { "inputs": model_inputs, "output_tr": out_positivity, - "loss_tr": loss, "experiment_xsize": full_nx, } return layer_info - invcovmat_tr = spec_dict["invcovmat"] - invcovmat_vl = spec_dict["invcovmat_vl"] - invcovmat = spec_dict["invcovmat_true"] - # Generate the loss function and rotations of the final data (if any) if spec_dict.get("data_transformation") is not None: + # TODO: I'm asuming that the diagonal covmat will work ootb, check # The rotation is the last layer so it should carry The Name - obsrot_tr = ObsRotation(spec_dict.get("data_transformation"), name=tr_name) - obsrot_vl = ObsRotation(spec_dict.get("data_transformation_vl"), name=vl_name) - loss_tr = losses.l_diaginvcovmat(invcovmat_tr) - loss_vl = losses.l_diaginvcovmat(invcovmat_vl) + obsrot_tr = ObsRotation(spec_dict.get("data_transformation")) + obsrot_vl = ObsRotation(spec_dict.get("data_transformation_vl")) else: obsrot_tr = None obsrot_vl = None - loss_tr = losses.l_invcovmat(invcovmat_tr) - # TODO At this point we need to intercept the data and compile the loss with it - # then the validation must have a list of None as an output - loss_vl = losses.l_invcovmat(invcovmat_vl) - loss = losses.l_invcovmat(invcovmat) - - def out_tr(pdf_layer, datasets_out=None): - exp_result = experiment_layer( - pdf_layer, model_obs=model_obs_tr, concat=concat_tr, datasets_out=datasets_out, rotation=obsrot_tr - ) - return exp_result - def out_vl(pdf_layer, datasets_out=None): - exp_result = experiment_layer( - pdf_layer, model_obs=model_obs_vl, concat=concat_vl, datasets_out=datasets_out, rotation=obsrot_vl - ) - return exp_result + # Prepare the inverse covmats for each of the loss functions + # (that are only instantiated when the output layer is created) + invcovmat_tr = spec_dict["invcovmat"] + invcovmat_vl = spec_dict["invcovmat_vl"] + invcovmat = spec_dict["invcovmat_true"] + + out_tr = ObservableWrapper( + spec_name, + model_obs_tr, + dataset_xsizes, + invcovmat=invcovmat_tr, + data=spec_dict["expdata"], + rotation=obsrot_tr, + ) + out_vl = ObservableWrapper( + f"{spec_name}_val", + model_obs_vl, + dataset_xsizes, + invcovmat=invcovmat_vl, + data=spec_dict["expdata_vl"], + rotation=obsrot_vl, + ) + out_exp = ObservableWrapper( + f"{spec_name}_exp", + model_obs_ex, + dataset_xsizes, + invcovmat=invcovmat, + data=spec_dict["expdata_true"], + rotation=None, + ) layer_info = { "inputs": model_inputs, - "output": experiment_layer, - "loss": loss, + "output": out_exp, "output_tr": out_tr, - "loss_tr": loss_tr, "output_vl": out_vl, - "loss_vl": loss_vl, "experiment_xsize": full_nx, } @@ -360,6 +368,7 @@ def pdfNN_layer_generator( regularizer=None, regularizer_args=None, impose_sumrule=False, + parallel_models=1, ): # pylint: disable=too-many-locals """ Generates the PDF model which takes as input a point in x (from 0 to 1) @@ -439,9 +448,10 @@ def pdfNN_layer_generator( Returns ------- - model_pdf: n3fit.backends.MetaModel + pdf_models: list with a number equal to `parallel_models` of type n3fit.backends.MetaModel a model f(x) = y where x is a tensor (1, xgrid, 1) and y a tensor (1, xgrid, out) """ + # Parse the input configuration if nodes is None: nodes = [15, 8] ln = len(nodes) @@ -462,84 +472,97 @@ def pdfNN_layer_generator( raise ValueError( "Number of activation functions does not match number of layers @ model_gen.py" ) - # The number of nodes in the last layer is equal to the number of fitted flavours (== len(flav_info)) last_layer_nodes = nodes[-1] - if layer_type == "dense": - reg = regularizer_selector(regularizer, **regularizer_args) - list_of_pdf_layers = generate_dense_network( - inp, - nodes, - activations, - initializer_name, - seed=seed, - dropout_rate=dropout, - regularizer=reg, - ) - elif layer_type == "dense_per_flavour": - # Define the basis size attending to the last layer in the network - # TODO: this information should come from the basis information - # once the basis information is passed to this class - list_of_pdf_layers = generate_dense_per_flavour_network( - inp, nodes, activations, initializer_name, seed=seed, basis_size=last_layer_nodes, - ) + # Generate the generic layers + + # Prepare the input for the PDF model + placeholder_input = Input(shape=(None, 1), batch_size=1) # If the input is of type (x, logx) # create a x --> (x, logx) layer to preppend to everything if inp == 2: - add_log = Lambda(lambda x: operations.concatenate([x, operations.op_log(x)], axis=-1)) - - def dense_me(x): - """Takes an input tensor `x` and applies all layers - from the `list_of_pdf_layers` in order""" - if inp == 1: - curr_fun = list_of_pdf_layers[0](x) - else: - curr_fun = list_of_pdf_layers[0](add_log(x)) - - for dense_layer in list_of_pdf_layers[1:]: - curr_fun = dense_layer(curr_fun) - return curr_fun - - # Preprocessing layer (will be multiplied to the last of the denses) - preproseed = seed + number_of_layers - layer_preproc = Preprocessing( - input_shape=(1,), - name="pdf_prepro", - flav_info=flav_info, - seed=preproseed, - output_dim=last_layer_nodes - ) - # Basis rotation - basis_rotation = FlavourToEvolution(flav_info=flav_info, fitbasis=fitbasis) + add_log = Lambda(lambda x: op.concatenate([x, op.op_log(x)], axis=-1)) # Evolution layer layer_evln = FkRotation(input_shape=(last_layer_nodes,), output_dim=out) + # Basis rotation + basis_rotation = FlavourToEvolution(flav_info=flav_info, fitbasis=fitbasis) - # Apply preprocessing and basis - def layer_fitbasis(x): - ret = operations.op_multiply([dense_me(x), layer_preproc(x)]) - if basis_rotation.is_identity(): - # if we don't need to rotate basis we don't want spurious layers - return ret - return basis_rotation(ret) - - # Rotation layer, changes from the 8-basis to the 14-basis - def layer_pdf(x): - return layer_evln(layer_fitbasis(x)) - - # Prepare the input for the PDF model - placeholder_input = Input(shape=(None, 1), batch_size=1) - - # Impose sumrule if necessary + # Normalization and sum rules if impose_sumrule: - layer_pdf, integrator_input = msr_constraints.msr_impose(layer_fitbasis, layer_pdf, mode=impose_sumrule) + sumrule_layer, integrator_input = msr_constraints.msr_impose(mode=impose_sumrule) model_input = [integrator_input, placeholder_input] else: + sumrule_layer = lambda x: x integrator_input = None model_input = [placeholder_input] - pdf_model = MetaModel(model_input, layer_pdf(placeholder_input), name="PDF") + pdf_models = [] + + # Now we need a trainable network per model to be trained in parallel + for i in range(parallel_models): + layer_seed = seed + i * number_of_layers + if layer_type == "dense": + reg = regularizer_selector(regularizer, **regularizer_args) + list_of_pdf_layers = generate_dense_network( + inp, + nodes, + activations, + initializer_name, + seed=seed, + dropout_rate=dropout, + regularizer=reg, + ) + elif layer_type == "dense_per_flavour": + # Define the basis size attending to the last layer in the network + # TODO: this information should come from the basis information + # once the basis information is passed to this class + list_of_pdf_layers = generate_dense_per_flavour_network( + inp, + nodes, + activations, + initializer_name, + seed=layer_seed, + basis_size=last_layer_nodes, + ) + + def dense_me(x): + """Takes an input tensor `x` and applies all layers + from the `list_of_pdf_layers` in order""" + if inp == 1: + curr_fun = list_of_pdf_layers[0](x) + else: + curr_fun = list_of_pdf_layers[0](add_log(x)) + + for dense_layer in list_of_pdf_layers[1:]: + curr_fun = dense_layer(curr_fun) + return curr_fun + + # Preprocessing layer (will be multiplied to the last of the denses) + preproseed = seed + number_of_layers * (i + 1) + layer_preproc = Preprocessing( + input_shape=(1,), name=f"pdf_prepro_{i}", flav_info=flav_info, seed=preproseed + ) + + # Apply preprocessing and basis + def layer_fitbasis(x): + ret = op.op_multiply([dense_me(x), layer_preproc(x)]) + if basis_rotation.is_identity(): + # if we don't need to rotate basis we don't want spurious layers + return ret + return basis_rotation(ret) + + # Rotation layer, changes from the 8-basis to the 14-basis + def layer_pdf(x): + return layer_evln(layer_fitbasis(x)) + + # Final PDF + final_pdf = sumrule_layer(layer_fitbasis, layer_pdf) + + pdf_model = MetaModel(model_input, final_pdf(placeholder_input), name=f"PDF_{i}") + + pdf_models.append(pdf_model) - return pdf_model + return pdf_models diff --git a/n3fit/src/n3fit/msr.py b/n3fit/src/n3fit/msr.py index 1f552888f1..20c43804cc 100644 --- a/n3fit/src/n3fit/msr.py +++ b/n3fit/src/n3fit/msr.py @@ -5,7 +5,7 @@ import numpy as np from n3fit.layers import xDivide, MSR_Normalization, xIntegrator -from n3fit.backends import operations +from n3fit.backends import operations as op from n3fit.backends import MetaModel @@ -37,51 +37,57 @@ def gen_integration_input(nx): return xgrid, weights_array -def msr_impose(fit_layer, final_pdf_layer, mode='All', verbose=False): +def msr_impose(nx=int(2e3), basis_size=8, mode='All'): """ - This function receives: - - fit_layer: the 8-basis layer of PDF which we fit - - final_layer: the 14-basis which is fed to the fktable - It uses pdf_fit to compute the sum rule and returns a modified version of - the final_pdf layer with a normalisation by which the sum rule is imposed + Generates a function that applies a normalization layer to the fit. + The normalization is computed from the direct output of the NN (so the 7,8-flavours basis) + and it is applied to the input of the fktable (i.e., to the 14-flavours fk-basis). + + Parameters + ---------- + nx: int + number of points for the integration grid + basis_size: int + number of flavours output of the NN """ # 1. Generate the fake input which will be used to integrate - nx = int(2e3) xgrid, weights_array = gen_integration_input(nx) # 2. Prepare the pdf for integration # for that we need to multiply several flavours with 1/x division_by_x = xDivide() - def pdf_integrand(x): - res = operations.op_multiply([division_by_x(x), fit_layer(x)]) - return res - # 3. Now create the integration layer (the layer that will simply integrate, given some weight integrator = xIntegrator(weights_array, input_shape=(nx,)) # 4. Now create the normalization by selecting the right integrations - normalizer = MSR_Normalization(input_shape=(8,), mode=mode) - - # 5. Make the xgrid numpy array into a backend input layer so it can be given - xgrid_input = operations.numpy_to_input(xgrid) - normalization = normalizer(integrator(pdf_integrand(xgrid_input))) - - def ultimate_pdf(x): - return operations.op_multiply_dim([final_pdf_layer(x), normalization]) - - if verbose: - # only_int = integrator(pdf_integrand(xgrid_input)) - # modelito = MetaModel(xgrid_input, only_int) - # result = modelito.predict(x = None, steps = 1) + normalizer = MSR_Normalization(input_shape=(basis_size,), mode=mode) + + # 5. Make the xgrid array into a backend input layer so it can be given to the normalization + xgrid_input = op.numpy_to_input(xgrid) + + # Now prepare a function that takes as input the 8-flavours output of the NN + # and the 14-flavours after the fk rotation and returns a 14-flavours normalized output + # note + TODO: + # the idea was that the normalization should always be applied at the fktable 14-flavours + # and always computed at the output of the NN (in case one would like to compute it differently) + # don't think it is a good idea anymore and should be changed to act only on the output to the fktable + # but will be dealt with in the future. + # fitlayer #final_pdf + def apply_normalization(layer_fitbasis, layer_pdf): + """ + layer_fitbasis: output of the NN + layer_pdf: output for the fktable + """ + pdf_integrand = op.op_multiply([division_by_x(xgrid_input), layer_fitbasis(xgrid_input)]) + normalization = normalizer(integrator(pdf_integrand)) - print(" > > Generating model for the inyection layer which imposes MSR") - check_integration(ultimate_pdf, xgrid_input) + def ultimate_pdf(x): + return layer_pdf(x)*normalization - # Save a reference to xgrid in ultimate_pdf, very useful for debugging - ultimate_pdf.ref_xgrid = xgrid_input + return ultimate_pdf - return ultimate_pdf, xgrid_input + return apply_normalization, xgrid_input def check_integration(ultimate_pdf, integration_input): @@ -93,12 +99,12 @@ def check_integration(ultimate_pdf, integration_input): """ nx = int(1e4) xgrid, weights_array = gen_integration_input(nx) - xgrid_input = operations.numpy_to_input(xgrid) + xgrid_input = op.numpy_to_input(xgrid) multiplier = xDivide(output_dim=14, div_list=range(3, 9)) def pdf_integrand(x): - res = operations.op_multiply([multiplier(x), ultimate_pdf(x)]) + res = op.op_multiply([multiplier(x), ultimate_pdf(x)]) return res modelito = MetaModel([xgrid_input, integration_input], pdf_integrand(xgrid_input)) diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index b12e6359ea..dd912d997e 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -75,6 +75,7 @@ def initialize_seeds(replica: list, trvlseed: int, nnseed: int, mcseed: int, gen # Action to be called by valid phys # All information defining the NN should come here in the "parameters" dict +@n3fit.checks.can_run_multiple_replicas @n3fit.checks.check_consistent_basis @n3fit.checks.wrapper_check_NN @n3fit.checks.wrapper_hyperopt @@ -92,6 +93,7 @@ def performfit( hyperopt=None, debug=False, maxcores=None, + parallel_models=1, ): """ This action will (upon having read a validcard) process a full PDF fit for a given replica. @@ -141,6 +143,8 @@ def performfit( activate some debug options maxcores: int maximum number of (logical) cores that the backend should be aware of + parallel_models: int + number of models to be run in parallel """ from n3fit.backends import set_initial_state @@ -239,11 +243,11 @@ def performfit( fitting["fitbasis"], nnseed, debug=debug, - save_weights_each=fitting.get("save_weights_each"), kfold_parameters=kfold_parameters, max_cores=maxcores, model_file=fitting.get("load"), - sum_rules=fitting.get("sum_rules", True) + sum_rules=fitting.get("sum_rules", True), + parallel_models=parallel_models ) # This is just to give a descriptive name to the fit function @@ -297,63 +301,59 @@ def performfit( # After the fit is run we get a 'result' dictionary with the following items: stopping_object = result["stopping_object"] - pdf_model = result["pdf_model"] - true_chi2 = result["loss"] - training = result["training"] - log.info("Total exp chi2: %s", true_chi2) + pdf_models = result["pdf_models"] # Where has the stopping point happened (this is only for debugging purposes) print( """ > > The stopping point has been at: {0} with a loss of {1} which it got at {2}. Stopping degree {3} - Positivity state: {4} + Positivity status: {4} """.format( stopping_object.stop_epoch, stopping_object.vl_chi2, stopping_object.e_best_chi2, stopping_object.stopping_degree, - stopping_object.positivity_status(), + stopping_object.positivity_status ) ) - # Create a pdf instance - pdf_instance = N3PDF(pdf_model, fit_basis=fitting.get("basis")) + final_time = stopwatch.stop() + all_training_chi2, all_val_chi2, all_exp_chi2 = the_model_trainer.evaluate(stopping_object) - # Generate the writer wrapper - writer_wrapper = WriterWrapper( - replica_number, - pdf_instance, - stopping_object, - theoryid.get_description().get("Q0") ** 2, - stopwatch.stop(), - ) + for i, pdf_model in enumerate(pdf_models): + # Each model goes into its own replica folder + replica_path_set = replica_path / f"replica_{replica_number + i}" - # Now write the data down - training_chi2, val_chi2, exp_chi2 = the_model_trainer.evaluate(stopping_object) - writer_wrapper.write_data( - replica_path_set, output_path.name, training_chi2, val_chi2, true_chi2 - ) + # Create a pdf instance + pdf_instance = N3PDF(pdf_model, fit_basis=fitting.get("basis")) + + # Generate the writer wrapper + writer_wrapper = WriterWrapper( + replica_number, + pdf_instance, + stopping_object, # TODO + theoryid.get_description().get("Q0") ** 2, + final_time, + ) + + # Get the right chi2s + training_chi2 = np.take(all_training_chi2, i) + val_chi2 = np.take(all_val_chi2, i) + exp_chi2 = np.take(all_exp_chi2, i) + + # And write the data down + writer_wrapper.write_data( + replica_path_set, output_path.name, training_chi2, val_chi2, exp_chi2 + ) - # Save the weights to some file for the given replica - model_file = fitting.get("save") - if model_file: - model_file_path = replica_path_set / model_file - log.info(" > Saving the weights for future in %s", model_file_path) - # Need to use "str" here because TF 2.2 has a bug for paths objects (fixed in 2.3 though) - pdf_model.save_weights(str(model_file_path), save_format="h5") - - # If the history of weights is active then loop over it - # rewind the state back to every step and write down the results - for step in range(len(stopping_object.history.reloadable_history)): - stopping_object.history.rewind(step) - new_path = output_path / f"history_step_{step}/replica_{replica_number}" - # We need to recompute the experimental chi2 for this point - training_chi2, val_chi2, exp_chi2 = the_model_trainer.evaluate(stopping_object) - writer_wrapper.write_data(new_path, output_path.name, training_chi2, val_chi2, exp_chi2) - - # So every time we want to capture output_path.name and addd a history_step_X - # parallel to the nnfit folder + # Save the weights to some file for the given replica + model_file = fitting.get("save") + if model_file: + model_file_path = replica_path_set / model_file + log.info(" > Saving the weights for future in %s", model_file_path) + # Need to use "str" here because TF 2.2 has a bug for paths objects (fixed in 2.3 though) + pdf_model.save_weights(str(model_file_path), save_format="h5") if tboard is not None: log.info("Tensorboard logging information is stored at %s", log_path) diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index 6773cd6c1c..94fb7e8dee 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -41,6 +41,7 @@ # Pass/veto keys POS_OK = "POS_PASS" POS_BAD = "POS_VETO" +THRESHOLD_POS = 1e-6 def parse_ndata(all_data): @@ -117,7 +118,7 @@ def parse_losses(history_object, data, suffix="loss"): total_points = 0 total_loss = 0 for exp_name, npoints in data.items(): - loss = np.take(hobj[exp_name + f"_{suffix}"], -1) + loss = np.array(hobj[exp_name + f"_{suffix}"]) dict_chi2[exp_name] = loss / npoints total_points += npoints total_loss += loss @@ -133,259 +134,357 @@ def parse_losses(history_object, data, suffix="loss"): class FitState: """ - Holds the state of the chi2 during the fit. + Holds the state of the chi2 during the fit for all replicas - It holds the necessary information to reload the fit - to a specific point in time if we are interested on reloading - (otherwise the relevant variables stay empty to save memory) + It holds the necessary information to reload the fit + to a specific point in time if we are interested on reloading + (otherwise the relevant variables stay empty to save memory) - Note: the training chi2 is computed before the update of the weights - so it is the chi2 that informed the updated corresponding to this state. - The validation chi2 instead is computed after the update of the weights. + Note: the training chi2 is computed before the update of the weights + so it is the chi2 that informed the updated corresponding to this state. + The validation chi2 instead is computed after the update of the weights. - Parameters - ---------- - all_tr_chi2: dict - Chi2 for all training datasets computed before the update of the weights - all_vl_chi2: dict - Chi2 for all validation datasets computed after the update of the weights - info: dict - Full return state of the Validation model + Parameters + ---------- + training_info: dict + all losses for the training model + validation_info: dict + all losses for the validation model """ - def __init__(self, all_tr_chi2, all_vl_chi2, info): - self.all_tr_chi2 = all_tr_chi2 - self.all_vl_chi2 = all_vl_chi2 - self.info = info - # These two variables are only filled for specific points - # in order to save precious memory, and only when we are - # saving the fit history each X number of epoch - self.weights = None - self.best_epoch = 0 - - def register_weigths(self, weights, best_epoch): - """ Save the current best weights and best_epoch of the fit """ - self.weights = weights - self.best_epoch = best_epoch + vl_ndata = None + tr_ndata = None + vl_suffix = None + + def __init__(self, training_info, validation_info): + if self.vl_ndata is None or self.tr_ndata is None or self.vl_suffix is None: + raise ValueError( + "FitState cannot be instantiated until vl_ndata, tr_ndata and vl_suffix are filled" + ) + self.training = training_info + self.validation = validation_info + self._parsed = False + self._vl_chi2 = None + self._tr_chi2 = None + self._vl_dict = None + self._tr_dict = None @property - def vl_chi2(self): - """ Returns the total validation chi2 """ - return self.all_vl_chi2["total"] + def vl_loss(self): + """Return the total validation loss as it comes from the info dictionaries""" + return self.validation.get("loss") + + @property + def tr_loss(self): + """Return the total validation loss as it comes from the info dictionaries""" + return self.training.get("loss") + + def _parse_chi2(self): + """ + Parses the chi2 from the losses according to the `tr_ndata` and + `vl_ndata` dictionaries of {dataset: n_points} + """ + if self._parsed: + return + if self.training is not None: + self._tr_chi2, self._tr_dict = parse_losses(self.training, self.tr_ndata) + if self.validation is not None: + self._vl_chi2, self._vl_dict = parse_losses( + self.validation, self.vl_ndata, suffix=self.vl_suffix + ) @property def tr_chi2(self): - """ Returns the total training chi2 """ - return self.all_tr_chi2["total"] + self._parse_chi2() + return self._tr_chi2 - def __str__(self): - return f"chi2: tr={self.tr_chi2} vl={self.vl_chi2}" + @property + def vl_chi2(self): + self._parse_chi2() + return self._vl_chi2 + @property + def all_tr_chi2(self): + self._parse_chi2() + return self._tr_dict -class FitHistory: - """ - Keeps a list of FitState items holding the full history of the fit. + @property + def all_vl_chi2(self): + self._parse_chi2() + return self._vl_dict - It also keeps track of the best epoch and the associated weights. + def all_tr_chi2_for_replica(self, r): + """" Return the tr chi2 per dataset for a given replica """ + return {k: np.take(i, r) for k, i in self.all_tr_chi2.items()} - Can be iterated when there are snapshots of the fit being saved. - When iterated it will rewind the fit to each of the point in history - that have been saved. + def all_vl_chi2_for_replica(self, r): + """" Return the vl chi2 per dataset for a given replica """ + return {k: np.take(i, r) for k, i in self.all_vl_chi2.items()} + + def total_partial_tr_chi2(self): + """ Return the tr chi2 summed over replicas per experiment""" + return {k: np.sum(i) for k, i in self.all_tr_chi2.items()} + + def total_partial_vl_chi2(self): + """ Return the vl chi2 summed over replicas per experiment""" + return {k: np.sum(i) for k, i in self.all_tr_chi2.items()} + + def total_tr_chi2(self): + """ Return the total tr chi2 summed over replicas """ + return np.sum(self.tr_chi2) + + def total_vl_chi2(self): + """ Return the total vl chi2 summed over replicas """ + return np.sum(self.vl_chi2) + + def __str__(self): + return f"chi2: tr={self.tr_chi2} vl={self.vl_chi2}" - Parameters - ---------- - pdf_model: n3fit.backends.MetaModel - PDF model being trained, used to saved the weights and compute - more - save_weights_each: int - if given, it will save a snapshot of the fit every `save_weights_each` epochs - """ - def __init__(self, pdf_model, save_weights_each=None): +class ReplicaState: + """Extra complication which eventually will be merged with someone else + but it is here only for development.""" + + def __init__(self, pdf_model): self._pdf_model = pdf_model - self._save_weights_each = save_weights_each - # Initialize variables for the history self._weights = None self._best_epoch = None - self._history = [] - self.final_epoch = None - # Initialize variables for the snapshots - self.reloadable_history = [] + self._stop_epoch = None + self._best_vl_chi2 = INITIAL_CHI2 + + def positivity_pass(self): + """ By definition, if we have a ``best_epoch`` then positivity passed """ + if self._best_epoch is None: + return False + else: + return True @property def best_epoch(self): - """ Epoch of the best fit """ + if self._best_epoch is None: + return self.stop_epoch return self._best_epoch - @best_epoch.setter - def best_epoch(self, epoch): - """ Saves the current weight """ + @property + def stop_epoch(self): + return self._stop_epoch + + @property + def best_vl(self): + return float(self._best_vl_chi2) + + @property + def positivity_status(self): + if self.positivity_pass(): + return POS_OK + else: + return POS_BAD + + def register_best(self, chi2, epoch): + """ Register a new best state and some metadata about it """ self._weights = self._pdf_model.get_weights() self._best_epoch = epoch + self._best_vl_chi2 = chi2 + + def reload(self): + """ Reload the weights of the best state """ + if self._weights: + self._pdf_model.set_weights(self._weights) + + def stop_training(self, epoch = None): + """ Stop training this replica if not stopped before """ + if self._pdf_model.trainable: + self._pdf_model.trainable = False + self._stop_epoch = epoch + + +class FitHistory: + """ + Keeps a list of FitState items holding the full history of the fit. + + It also keeps track of the best epoch and the associated weights. + + Can be iterated when there are snapshots of the fit being saved. + When iterated it will rewind the fit to each of the point in history + that have been saved. + + Parameters + ---------- + pdf_models: n3fit.backends.MetaModel + list of PDF models being trained, used to saved the weights + """ + + def __init__(self, pdf_models, tr_ndata, vl_ndata): + # Create a ReplicaState object for all models + # which will hold the best chi2 and weights per replica + self._replicas = [] + for pdf_model in pdf_models: + self._replicas.append(ReplicaState(pdf_model)) + self._iter_replicas = iter(self._replicas) + + if vl_ndata is None: + vl_ndata = tr_ndata + vl_suffix = "loss" + else: + vl_suffix = "val_loss" + # All instances of FitState should use these + FitState.tr_ndata = tr_ndata + FitState.vl_ndata = vl_ndata + FitState.vl_suffix = vl_suffix + + # Save a list of status for the entire fit + self._history = [] + self.final_epoch = None + + @property + def best_epoch(self): + """ Return the best epoch per replica """ + return [i.best_epoch for i in self._replicas] def get_state(self, epoch): """ Get the FitState of the system for a given epoch """ - return self._history[epoch] + try: + return self._history[epoch] + except IndexError as e: + raise ValueError( + f"Tried to get obtain the state for epoch {epoch} when only {len(self._history)} epochs have been saved" + ) from e + + def save_best_replica(self, i, epoch=None): + """Save the state of replica ``i`` as a best fit so far. + If an epoch is given, save the best as the given epoch, otherwise + use the last one + """ + if epoch is None: + epoch = self.final_epoch + loss = self.get_state(epoch).vl_loss[i] + self._replicas[i].register_best(loss, epoch) - def best_state(self): - """ Return the FitState object corresponding to the best fit """ - if self.best_epoch is None: - return None - else: - index = self.best_epoch - best_state = self._history[index] - return best_state + def all_positivity_status(self): + """ Returns whether the positivity passed or not per replica """ + return np.array([i.positivity_status for i in self._replicas]) - def best_vl(self): - """ Returns the chi2 of the best fit - if there was no best fit returns `INITIAL_CHI2` - if there was a problem, returns `TERRIBLE_CHI2` """ - if not self._weights: - return TERRIBLE_CHI2 - best_state = self.best_state() - if best_state: - return best_state.vl_chi2 - else: - return INITIAL_CHI2 - - def best_tr(self): - """ Returns the training chi2 of the best fit - if there are no best fit, returns the last one """ - best_state = self.best_state() - if best_state: - return best_state.tr_chi2 - else: - return self._history[self.final_epoch].tr_chi2 + def all_best_vl_loss(self): + """ Returns the best validation loss for each replica """ + return np.array([i.best_vl for i in self._replicas]) - def register(self, fitstate, epoch): - """ Save a new fitstate and updates the current final epoch - Every `save_weights_each` (if set) saves a snapshot of the current best fit into - the fitstate + def register(self, epoch, training_info, validation_info): + """Save a new fitstate and updates the current final epoch Parameters ---------- - `fitstate` - a fitstate object to save - `epoch` + fitstate: FitState + FitState object + the fitstate of the object to save + epoch: int the current epoch of the fit """ + # Save all the information in a fitstate object + fitstate = FitState(training_info, validation_info) self.final_epoch = epoch self._history.append(fitstate) - if self._save_weights_each: - save_here = (epoch + 1) % self._save_weights_each - if save_here == 0: - fitstate.register_weigths(self._weights, self.best_epoch) - self.reloadable_history.append(fitstate) - - def reload(self, weights=None): - """ Reloads the best fit weights into the model - if there are models to be reloaded - A set of weights can be enforced as an optional argument - """ - if weights is None: - weights = self._weights - if weights: - self._pdf_model.set_weights(weights) + return fitstate + + def stop_training_replica(self, i, e): + """ Stop training replica i in epoch e""" + self._replicas[i].stop_training(e) - def rewind(self, step): - """ Rewind the FitHistory object to the step `step` in the fit + def reload(self): + """Reloads the best fit weights into the model if there are models to be reloaded + Ensure that all replicas have stopped at this point. """ - fitstate = self.reloadable_history[step] - historic_weights = fitstate.weights - self.reload(weights=historic_weights) - self.best_epoch = fitstate.best_epoch - self.final_epoch = (step + 1) * self._save_weights_each - 1 - # -1 because we are saving the epochs starting at 0 + for replica in self._replicas: + replica.stop_training(self.final_epoch) + replica.reload() + + def __next__(self): + return next(self._iter_replicas) class Stopping: """ - Driver of the stopping algorithm + Driver of the stopping algorithm - Note, if the total number of points in the validation dictionary is None, it is assumed - the validation_model actually corresponds to the training model. + Note, if the total number of points in the validation dictionary is None, it is assumed + the validation_model actually corresponds to the training model. - Parameters - ---------- - validation_model: n3fit.backends.MetaModel - the model with the validation mask applied - (and compiled with the validation data and covmat) - all_data_dict: dict - list containg all dictionaries containing all information about - the experiments/validation/regularizers/etc to be parsed by Stopping - pdf_model: n3fit.backends.MetaModel - pdf_model being trained - threshold_positivity: float - maximum value allowed for the sum of all positivity losses - total_epochs: int - total number of epochs - stopping_patience: int - how many epochs to wait for the validation loss to improve - dont_stop: bool - dont care about early stopping - save_weights_each: int - every how many epochs to save a snapshot of the fit + Parameters + ---------- + validation_model: n3fit.backends.MetaModel + the model with the validation mask applied + (and compiled with the validation data and covmat) + all_data_dict: dict + list containg all dictionaries containing all information about + the experiments/validation/regularizers/etc to be parsed by Stopping + pdf_models: list(n3fit.backends.MetaModel) + list of pdf_models being trained + threshold_positivity: float + maximum value allowed for the sum of all positivity losses + total_epochs: int + total number of epochs + stopping_patience: int + how many epochs to wait for the validation loss to improve + dont_stop: bool + dont care about early stopping """ def __init__( self, validation_model, all_data_dicts, - pdf_model, - threshold_positivity=1e-6, + pdf_models, + threshold_positivity=THRESHOLD_POS, total_epochs=0, stopping_patience=7000, threshold_chi2=10.0, dont_stop=False, - save_weights_each=None, ): - # Parse the training, validation and positivity sets from all the input dictionaries - self._tr_ndata, vl_ndata, pos_sets = parse_ndata(all_data_dicts) + # Save the validation object + self._validation = validation_model - # Create the Validation, Positivity and History objects - if vl_ndata is None: - self.validation = Validation(validation_model, self._tr_ndata, no_validation=True) - else: - self.validation = Validation(validation_model, vl_ndata) - self.positivity = Positivity(threshold_positivity, pos_sets) - self.history = FitHistory(pdf_model, save_weights_each=save_weights_each) + # Create the History object + tr_ndata, vl_ndata, pos_sets = parse_ndata(all_data_dicts) + self._history = FitHistory(pdf_models, tr_ndata, vl_ndata) + + # And the positivity checker + self._positivity = Positivity(threshold_positivity, pos_sets) # Initialize internal variables for the stopping + self.n_replicas = len(pdf_models) self.threshold_chi2 = threshold_chi2 + self.stopping_degree = np.zeros(self.n_replicas, dtype=np.int) + self.count = np.zeros(self.n_replicas, dtype=np.int) + self.dont_stop = dont_stop self.stop_now = False self.stopping_patience = stopping_patience - self.stopping_degree = 0 - self.count = 0 self.total_epochs = total_epochs @property def vl_chi2(self): - """ Validation chi2 """ - return self.history.best_vl() - - @property - def tr_chi2(self): - """ Training chi2 """ - return self.history.best_tr() + """ Current validation chi2 """ + validation_info = self._validation.compute_losses() + fitstate = FitState(None, validation_info) + return fitstate.vl_chi2 @property def e_best_chi2(self): - """ Epoch of the best chi2, if there is no best epoch - return the last epoch""" - be = self.history.best_epoch - if be is None: - return self.stop_epoch - return be - + """ Epoch of the best chi2, if there is no best epoch, return last""" + return self._history.best_epoch @property def stop_epoch(self): """ Epoch in which the fit is stopped """ - return self.history.final_epoch + 1 + return self._history.final_epoch + 1 + + @property + def positivity_status(self): + """Returns POS_PASS if positivity passes or veto if it doesn't + for each replica""" + return self._history.all_positivity_status() def evaluate_training(self, training_model): - """ Given the training model, evaluates the + """Given the training model, evaluates the model and parses the chi2 of the training datasets Parameters @@ -399,8 +498,8 @@ def evaluate_training(self, training_model): chi2 of the given ``training_model`` """ training_info = training_model.compute_losses() - tr_chi2, _ = parse_losses(training_info, self._tr_ndata) - return tr_chi2 + fitstate = FitState(training_info, None) + return fitstate.tr_chi2 def monitor_chi2(self, training_info, epoch, print_stats=False): """ @@ -416,81 +515,86 @@ def monitor_chi2(self, training_info, epoch, print_stats=False): Parameters ---------- - `training_info` - the output of a .fit() run - `epoch` - the index of the epoch + training_info: dict + output of a .fit() call, dictionary of the total loss (summed over replicas) for + each experiment + epoch: int + index of the epoch Returns ------- - `pass_ok` + pass_ok: bool true/false according to the status of the run """ - # Step 1. Preprocess the event, count it towards the stopping degree - # parse the training information and check whether it is a good point - tr_chi2, all_tr = parse_losses(training_info, self._tr_ndata) - - if np.isnan(tr_chi2): + # Step 1. Check whether the fit has NaN'd and stop it if so + if np.isnan(training_info["loss"]): log.warning(" > NaN found, stopping activated") - self.stop_now = True - # If we had a good model at any point, reload - self.history.reload() + self.make_stop() return False - self.stopping_degree += self.count - - # Step 2. Check the validation loss at this point - vl_chi2, all_vl = self.validation.loss() + # Step 2. Compute the validation metrics + validation_info = self._validation.compute_losses() - # Step 3. Store information about the run and print stats if asked - fitstate = FitState(all_tr, all_vl, self.validation.state) - self.history.register(fitstate, epoch) + # Step 3. Register the current point in (the) history + fitstate = self._history.register(epoch, training_info, validation_info) if print_stats: self.print_current_stats(epoch, fitstate) # Step 4. Check whether this is a better fit # this means improving vl_chi2 and passing positivity - if self.positivity(fitstate) and vl_chi2 < self.threshold_chi2: - if vl_chi2 < self.history.best_vl(): - # Set the new best - self.history.best_epoch = epoch - # Save stopping info - self.stopping_degree = 0 - # Initialize the counter - self.count = 1 - - # If your patience has ended, prepare for stop - if self.stopping_degree > self.stopping_patience: + # Don't start counting until the chi2 of the validation goes below a certain threshold + # once we start counting, don't bother anymore + passes = self.count | (fitstate.vl_chi2 < self.threshold_chi2) + passes &= fitstate.vl_loss < self._history.all_best_vl_loss() + # And the ones that pass positivity + passes &= self._positivity(fitstate) + + self.stopping_degree += self.count + + # Step 5. loop over the valid indices to check whether the vl improved + for i in np.where(passes)[0]: + self._history.save_best_replica(i) + self.stopping_degree[i] = 0 + self.count[i] = 1 + + stop_replicas = self.count & (self.stopping_degree > self.stopping_patience) + for i in np.where(stop_replicas)[0]: + self.count[i] = 0 + self._history.stop_training_replica(i, epoch) + + # By using the stopping degree we only stop when none of the replicas are improving anymore + if min(self.stopping_degree) > self.stopping_patience: self.make_stop() return True def make_stop(self): - """ Convenience method to set the stop_now flag + """Convenience method to set the stop_now flag and reload the history to the point of the best model if any """ self.stop_now = True - self.history.reload() + self._history.reload() def print_current_stats(self, epoch, fitstate): """ - Prints the last validation and training loss saved + Prints ``fitstate`` training and validation chi2s """ epoch_index = epoch + 1 - tr_loss = fitstate.tr_chi2 - vl_loss = fitstate.vl_chi2 - total_str = f"At epoch {epoch_index}/{self.total_epochs}, total loss: {tr_loss}\n" - - partials = [] - for experiment in self._tr_ndata: - chi2 = fitstate.all_tr_chi2[experiment] - partials.append(f"{experiment}: {chi2:.3f}") - total_str += ", ".join(partials) - - total_str += f"\nValidation loss at this point: {vl_loss}" + tr_chi2 = fitstate.total_tr_chi2() + vl_chi2 = fitstate.total_vl_chi2() + total_str = f"At epoch {epoch_index}/{self.total_epochs}, total chi2: {tr_chi2}\n" + + # The partial chi2 makes no sense for more than one replica at once: + if self.n_replicas == 1: + partial_tr_chi2 = fitstate.total_partial_tr_chi2() + partials = [] + for experiment, chi2 in partial_tr_chi2.items(): + partials.append(f"{experiment}: {chi2:.3f}") + total_str += ", ".join(partials) + "\n" + total_str += f"Validation chi2 at this point: {vl_chi2}" log.info(total_str) def stop_here(self): - """ Returns the stopping status + """Returns the stopping status If `dont_stop` is set returns always False (i.e., never stop) """ if self.dont_stop: @@ -498,133 +602,64 @@ def stop_here(self): else: return self.stop_now - def positivity_pass(self): - """ Checks whether the positivity loss is below the requested threshold - If there is no best state, the positivity (obv) cannot pass - """ - best_state = self.history.best_state() - if best_state is not None and self.positivity(best_state): - return True - else: - return False + def get_next_replica(self): + """ Return the next ReplicaState object""" + return next(self._history) - def positivity_status(self): - """ Checks whether the positivity loss is below the requested threshold - If there is no best state, the positivity (obv) cannot pass - """ - if self.positivity_pass(): - return POS_OK - else: - return POS_BAD - - def chi2exps_str(self, log_each=100): + def chi2exps_str(self, replica=0, log_each=100): """ Returns a list of log-string with the status of the fit every `log_each` epochs Parameters ---------- - `log_each` + replica: int + which replica are we writing the log for + log_each: int every how many epochs to print the log Returns ------- - `file_list` - a list of string to be printed as `chi2exps.log` + file_list: list(str) + a list of strings to be printed as `chi2exps.log` """ - final_epoch = self.history.final_epoch + final_epoch = self._history.final_epoch file_list = [] for i in range(log_each - 1, final_epoch + 1, log_each): - fitstate = self.history.get_state(i) - all_tr = fitstate.all_tr_chi2 - all_vl = fitstate.all_vl_chi2 + fitstate = self._history.get_state(i) + all_tr = fitstate.all_tr_chi2_for_replica(replica) + all_vl = fitstate.all_vl_chi2_for_replica(replica) # Here it is assumed the validation exp set is always a subset of the training exp set data_list = [] - for exp in self._tr_ndata: - tr_loss = all_tr[exp] + for exp, tr_loss in all_tr.items(): vl_loss = all_vl.get(exp, 0.0) data_str = f"{exp}: {tr_loss} {vl_loss}" data_list.append(data_str) data = "\n".join(data_list) epoch_index = i + 1 - total_tr_loss = fitstate.tr_chi2 - total_vl_loss = fitstate.vl_chi2 strout = f""" Epoch: {epoch_index} {data} -Total: training = {total_tr_loss} validation = {total_vl_loss} """ file_list.append(strout) return file_list -class Validation: - """ - Controls the NNPDF cross-validation algorithm - - The cross-validation refers to the validation loss of the points of the dataset - not used in the fitting. - In general for any points considered here there will accompanying points from the - same dataset being included in the fitting. - - Parameters - ---------- - model: n3fit.backends.MetaModel - the model with the validation mask applied - (and compiled with the validation data and covmat) - """ - - def __init__(self, model, ndata_dict, verbose=False, no_validation=False): - self.model = model - self.state = None - self.verbose = verbose - self.ndata_dict = ndata_dict - self.n_val_exp = len(ndata_dict) - if no_validation: - self.suffix = "loss" - else: - self.suffix = "val_loss" - - def _compute_validation_loss(self): - """ - Evaluates the validation model and returns a tuple (`total_loss`, `vl_dict`) - with the information for the validation loss by experimenet normalized to the - number of points of each experiment - - Returns - ------- - `total_loss` - total vale for the validation loss - `vl_dict` - dictionary containing a map of experiment names and loss - """ - loss_dict = self.model.compute_losses() - self.state = loss_dict - return parse_losses(loss_dict, self.ndata_dict, suffix=self.suffix) - - def loss(self): - """ - Returns a tuple with the validation loss and a - dictionary for the validation loss per experiment - """ - return self._compute_validation_loss() - - class Positivity: """ - Controls the positivity requirements. + Controls the positivity requirements. - In order to check the positivity passes will check the history of the fitting - as the fitting included positivity sets. - If the sum of all positivity sets losses is above a certain value the model is - not accepted and the training continues. + In order to check the positivity passes will check the history of the fitting + as the fitting included positivity sets. + If the sum of all positivity sets losses is above a certain value the model is + not accepted and the training continues. - Parameters - ---------- - threshold_positivity: float - maximum value allowed for the sum of all positivity losses - positivity_sets: list - list of positivity datasets + Parameters + ---------- + threshold_positivity: float + maximum value allowed for the sum of all positivity losses + positivity_sets: list + list of positivity datasets """ def __init__(self, threshold, positivity_sets): @@ -633,29 +668,27 @@ def __init__(self, threshold, positivity_sets): def check_positivity(self, history_object): """ - This function receives a history objects and loops over the - positivity_sets to check the value of the positivity loss. - - If the positivity loss is above the threshold, the positivity fails - otherwise, it passes. - - Parameters - ---------- - history_object: dict - dictionary of entries in the form {'name': loss}, output of a MetaModel .fit() + This function receives a history objects and loops over the + positivity_sets to check the value of the positivity loss. + + If the positivity loss is above the threshold, the positivity fails + otherwise, it passes. + It returns an array booleans which are True if positivity passed + story_object[key_loss] < self.threshold + Parameters + ---------- + history_object: dict + dictionary of entries in the form {'name': loss}, output of a MetaModel .fit() """ + positivity_pass = True for key in self.positivity_sets: key_loss = f"{key}_loss" - # If we are taking the avg when checking the output, we should do so here as well? - positivity_loss = np.take(history_object[key_loss], -1) - if positivity_loss > self.threshold: - return False - # If none of the positivities failed, it passes - return True + positivity_pass &= history_object[key_loss] < self.threshold + return np.array(positivity_pass) def __call__(self, fitstate): """ - Checks whether a given FitState object - passes the positivity requirement + Checks whether a given FitState object + passes the positivity requirement """ - return self.check_positivity(fitstate.info) + return self.check_positivity(fitstate.validation) diff --git a/n3fit/src/n3fit/tests/test_backend.py b/n3fit/src/n3fit/tests/test_backend.py index 2693fd435a..64355a06f1 100644 --- a/n3fit/src/n3fit/tests/test_backend.py +++ b/n3fit/src/n3fit/tests/test_backend.py @@ -6,7 +6,6 @@ import functools import numpy as np from n3fit.backends import operations as op -from n3fit.backends import losses # General parameters DIM = 7 @@ -117,34 +116,3 @@ def test_tensor_product(): def test_sum(): numpy_check(op.sum, np.sum, mode='single') - - -# Tests loss functions -def test_l_invcovmat(): - loss_f = losses.l_invcovmat(INVCOVMAT) - result = loss_f(T1, T2) - y = ARR1 - ARR2 - tmp = np.dot(INVCOVMAT, y) - reference = np.dot(y, tmp) - are_equal(result, reference) - - -def test_l_positivity(): - alpha = 1e-7 - loss_f = losses.l_positivity(alpha=alpha) - result = loss_f(0.0, T1) - - def elu_sum(yarr_in): - """ Applies Exponential Linear Unit - to an array and sums it up """ - yarr = -yarr_in - res = 0.0 - for y in yarr: - if y > 0: - res += y - else: - res += alpha * (np.exp(y) - 1) - return res - - reference = elu_sum(ARR1) - are_equal(result, reference) diff --git a/n3fit/src/n3fit/tests/test_fit.py b/n3fit/src/n3fit/tests/test_fit.py index adfb4809a8..2c3f509405 100644 --- a/n3fit/src/n3fit/tests/test_fit.py +++ b/n3fit/src/n3fit/tests/test_fit.py @@ -138,6 +138,7 @@ def test_performfit_and_timing(tmp_path): auxiliary_performfit(tmp_path, replica=2, timing=True) +@pytest.mark.skip(reason="Still not implemented in parallel mode") def test_hyperopt(tmp_path): # Prepare the run quickcard = f"hyper-{QUICKNAME}.yml" diff --git a/n3fit/src/n3fit/tests/test_layers.py b/n3fit/src/n3fit/tests/test_layers.py index 8ea84457da..d27616b830 100644 --- a/n3fit/src/n3fit/tests/test_layers.py +++ b/n3fit/src/n3fit/tests/test_layers.py @@ -163,7 +163,8 @@ def test_DY(): fks = [i['fktable'] for i in fkdicts] obs_layer = layers.DY(fkdicts, fks, ope, nfl=FLAVS) pdf = np.random.rand(XSIZE, FLAVS) - kp = op.numpy_to_tensor(np.expand_dims(pdf, 0)) + # Add batch dimension (0) and replica dimension (-1) + kp = op.numpy_to_tensor(np.expand_dims(pdf, [0,-1])) # generate the n3fit results result_tensor = obs_layer(kp) result = op.evaluate(result_tensor) diff --git a/n3fit/src/n3fit/tests/test_losses.py b/n3fit/src/n3fit/tests/test_losses.py new file mode 100644 index 0000000000..784c9aeed7 --- /dev/null +++ b/n3fit/src/n3fit/tests/test_losses.py @@ -0,0 +1,43 @@ +""" + Test the losses layers +""" +import numpy as np +from n3fit.layers import losses +from n3fit.backends import operations as op +from .test_backend import are_equal, DIM + +ARR1 = np.random.rand(DIM) +ARR2 = np.random.rand(DIM) +C = np.random.rand(DIM, DIM) +INVCOVMAT = np.linalg.inv(C @ C.T) + +# Tests loss functions +def test_l_invcovmat(): + loss_f = losses.LossInvcovmat(INVCOVMAT, ARR1) + # Add a replica and batch dimension to T2 + result = loss_f(np.expand_dims(ARR2, [0, 1])) + y = ARR1 - ARR2 + tmp = np.dot(INVCOVMAT, y) + reference = np.dot(y, tmp) + are_equal(result, reference) + + +def test_l_positivity(): + alpha = 1e-7 + loss_f = losses.LossPositivity(alpha=alpha) + result = loss_f(np.expand_dims(ARR2, [0, 1])) + + def elu_sum(yarr_in): + """Applies Exponential Linear Unit + to an array and sums it up""" + yarr = -yarr_in + res = 0.0 + for y in yarr: + if y > 0: + res += y + else: + res += alpha * (np.exp(y) - 1) + return res + + reference = elu_sum(ARR1) + are_equal(result, reference) diff --git a/n3fit/src/n3fit/vpinterface.py b/n3fit/src/n3fit/vpinterface.py index dd5077e8c5..b4d4a975b6 100644 --- a/n3fit/src/n3fit/vpinterface.py +++ b/n3fit/src/n3fit/vpinterface.py @@ -18,12 +18,14 @@ """ +import logging import numpy as np import numpy.linalg as la from validphys.core import PDF, MCStats from validphys.pdfbases import ALL_FLAVOURS, check_basis from validphys.arclength import integrability_number, arc_lengths +log = logging.getLogger(__name__) # Order of the evolution basis output from n3fit EVOL_LIST = [ "photon", @@ -83,13 +85,23 @@ def get_nn_weights(self): """Outputs all weights of the NN as numpy.ndarrays """ return self.model.get_weights() - def get_preprocessing_factors(self): + def get_preprocessing_factors(self, replica=None): """Loads the preprocessing alpha and beta arrays from the PDF trained model. If a ``fit_basis`` given in the format of ``n3fit`` runcards is given it will be used to generate a new dictionary with the names, the exponent and whether they are trainable otherwise outputs a Nx2 array where [:,0] are alphas and [:,1] betas """ - preprocessing_layer = self.model.get_layer("pdf_prepro") + # If the replica is given, get the requested preprocessing layer + # otherwise, search for any pdf_prepro_X layer within the model + if replica is not None: + preprocessing_layer = self.model.get_layer(f"pdf_prepro_{replica}") + else: + preprocessing_layers = self.model.get_layer_re("pdf_prepro_\d") + if len(preprocessing_layers) != 1: + # We really don't want to fail at this point, but print a warning at least... + log.warning("More than one preprocessing layer found within the model!") + preprocessing_layer = preprocessing_layers[0] + if self.fit_basis is not None: output_dictionaries = [] for d in self.fit_basis: