diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py index 7b509f0065..885785a268 100644 --- a/n3fit/src/n3fit/checks.py +++ b/n3fit/src/n3fit/checks.py @@ -108,12 +108,29 @@ def check_initializer(initializer): raise CheckError(f"Initializer {initializer} not accepted by {MetaLayer}") +def check_layer_type_implemented(parameters): + """Checks whether the layer_type is implemented""" + layer_type = parameters.get("layer_type") + implemented_types = ["dense", "dense_per_flavour"] + if layer_type not in implemented_types: + raise CheckError( + f"Layer type {layer_type} not implemented, must be one of {implemented_types}" + ) + + def check_dropout(parameters): """Checks the dropout setup (positive and smaller than 1.0)""" dropout = parameters.get("dropout") if dropout is not None and not 0.0 <= dropout <= 1.0: raise CheckError(f"Dropout must be between 0 and 1, got: {dropout}") + layer_type = parameters.get("layer_type") + if dropout is not None and dropout > 0.0 and layer_type == "dense_per_flavour": + raise CheckError( + "Dropout is not compatible with the dense_per_flavour layer type, " + "please use instead `parameters::layer_type: dense`" + ) + def check_tensorboard(tensorboard): """Check that the tensorbard callback can be enabled correctly""" @@ -168,6 +185,7 @@ def wrapper_check_NN(basis, tensorboard, save, load, parameters): check_consistent_layers(parameters) check_basis_with_layers(basis, parameters) check_stopping(parameters) + check_layer_type_implemented(parameters) check_dropout(parameters) check_lagrange_multipliers(parameters, "integrability") check_lagrange_multipliers(parameters, "positivity") diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index ae6ed01182..09f337dbca 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -299,103 +299,6 @@ def observable_generator( return layer_info -# Network generation functions -def generate_dense_network( - nodes_in: int, - nodes: int, - activations: List[str], - initializer_name: str = "glorot_normal", - seed: int = 0, - dropout_rate: float = 0.0, - regularizer: str = None, -): - """ - Generates a dense network - - the dropout rate, if selected, is set - for the next to last layer (i.e., the last layer of the dense network before getting to - the output layer for the basis choice) - """ - list_of_pdf_layers = [] - number_of_layers = len(nodes) - if dropout_rate > 0: - dropout_layer = number_of_layers - 2 - else: - dropout_layer = -1 - for i, (nodes_out, activation) in enumerate(zip(nodes, activations)): - # if we have dropout set up, add it to the list - if dropout_rate > 0 and i == dropout_layer: - list_of_pdf_layers.append(base_layer_selector("dropout", rate=dropout_rate)) - - # select the initializer and move the seed - init = MetaLayer.select_initializer(initializer_name, seed=seed + i) - - # set the arguments that will define the layer - arguments = { - "kernel_initializer": init, - "units": int(nodes_out), - "activation": activation, - "input_shape": (nodes_in,), - "kernel_regularizer": regularizer, - } - - layer = base_layer_selector("dense", **arguments) - - list_of_pdf_layers.append(layer) - nodes_in = int(nodes_out) - return list_of_pdf_layers - - -def generate_dense_per_flavour_network( - nodes_in, nodes, activations, initializer_name="glorot_normal", seed=0, basis_size=8 -): - """ - For each flavour generates a dense network of the chosen size - - """ - list_of_pdf_layers = [] - number_of_layers = len(nodes) - current_seed = seed - for i, (nodes_out, activation) in enumerate(zip(nodes, activations)): - initializers = [] - for _ in range(basis_size): - # select the initializer and move the seed - initializers.append(MetaLayer.select_initializer(initializer_name, seed=current_seed)) - current_seed += 1 - - # set the arguments that will define the layer - # but careful, the last layer must be nodes = 1 - # TODO the mismatch is due to the fact that basis_size - # is set to the number of nodes of the last layer when it should - # come from the runcard - if i == number_of_layers - 1: - nodes_out = 1 - arguments = { - "kernel_initializer": initializers, - "units": nodes_out, - "activation": activation, - "input_shape": (nodes_in,), - "basis_size": basis_size, - } - - layer = base_layer_selector("dense_per_flavour", **arguments) - - if i == number_of_layers - 1: - # For the last layer, apply concatenate - concat = base_layer_selector("concatenate") - - def output_layer(ilayer): - result = layer(ilayer) - return concat(result) - - list_of_pdf_layers.append(output_layer) - else: - list_of_pdf_layers.append(layer) - - nodes_in = int(nodes_out) - return list_of_pdf_layers - - def generate_pdf_model( nodes: List[int] = None, activations: List[str] = None, @@ -670,7 +573,6 @@ def pdfNN_layer_generator( sumrule_layer = lambda x: x # Only these layers change from replica to replica: - nn_replicas = [] preprocessing_factor_replicas = [] for i_replica, replica_seed in enumerate(seed): preprocessing_factor_replicas.append( @@ -682,21 +584,19 @@ def pdfNN_layer_generator( large_x=not subtract_one, ) ) - nn_replicas.append( - generate_nn( - layer_type=layer_type, - input_dimensions=nn_input_dimensions, - nodes=nodes, - activations=activations, - initializer_name=initializer_name, - replica_seed=replica_seed, - dropout=dropout, - regularizer=regularizer, - regularizer_args=regularizer_args, - last_layer_nodes=last_layer_nodes, - name=f"NN_{i_replica}", - ) - ) + + nn_replicas = generate_nn( + layer_type=layer_type, + nodes_in=nn_input_dimensions, + nodes=nodes, + activations=activations, + initializer_name=initializer_name, + replica_seeds=seed, + dropout=dropout, + regularizer=regularizer, + regularizer_args=regularizer_args, + last_layer_nodes=last_layer_nodes, + ) # Apply NN layers for all replicas to a given input grid def neural_network_replicas(x, postfix=""): @@ -780,44 +680,118 @@ def compute_unnormalized_pdf(x, postfix=""): def generate_nn( layer_type: str, - input_dimensions: int, + nodes_in: int, nodes: List[int], activations: List[str], initializer_name: str, - replica_seed: int, + replica_seeds: List[int], dropout: float, regularizer: str, regularizer_args: dict, last_layer_nodes: int, - name: str, ) -> MetaModel: """ Create the part of the model that contains all of the actual neural network - layers. + layers, for each replica. + + Parameters + ---------- + layer_type: str + Type of layer to use. Can be "dense" or "dense_per_flavour". + nodes_in: int + Number of nodes in the input layer. + nodes: List[int] + Number of nodes in each hidden layer. + activations: List[str] + Activation function to use in each hidden layer. + initializer_name: str + Name of the initializer to use. + replica_seeds: List[int] + List of seeds to use for each replica. + dropout: float + Dropout rate to use (if 0, no dropout is used). + regularizer: str + Name of the regularizer to use. + regularizer_args: dict + Arguments to pass to the regularizer. + last_layer_nodes: int + Number of nodes in the last layer. + + Returns + ------- + nn_replicas: List[MetaModel] + List of MetaModel objects, one for each replica. """ - common_args = { - 'nodes_in': input_dimensions, - 'nodes': nodes, - 'activations': activations, - 'initializer_name': initializer_name, - 'seed': replica_seed, - } - if layer_type == "dense": + nodes_list = list(nodes) # so we can modify it + x_input = Input(shape=(None, nodes_in), batch_size=1, name='xgrids_processed') + + custom_args = {} + if layer_type == "dense_per_flavour": + # set the arguments that will define the layer + # but careful, the last layer must be nodes = 1 + # TODO the mismatch is due to the fact that basis_size + # is set to the number of nodes of the last layer when it should + # come from the runcard + nodes_list[-1] = 1 + basis_size = last_layer_nodes + custom_args['basis_size'] = basis_size + + def initializer_generator(seed, i_layer): + seed += i_layer * basis_size + initializers = [ + MetaLayer.select_initializer(initializer_name, seed=seed + b) + for b in range(basis_size) + ] + return initializers + + elif layer_type == "dense": reg = regularizer_selector(regularizer, **regularizer_args) - list_of_pdf_layers = generate_dense_network( - **common_args, dropout_rate=dropout, regularizer=reg - ) - elif layer_type == "dense_per_flavour": - list_of_pdf_layers = generate_dense_per_flavour_network( - **common_args, basis_size=last_layer_nodes - ) + custom_args['regularizer'] = reg + + def initializer_generator(seed, i_layer): + seed += i_layer + return MetaLayer.select_initializer(initializer_name, seed=seed) + + # First create all the layers... + # list_of_pdf_layers[d][r] is the layer at depth d for replica r + list_of_pdf_layers = [] + for i_layer, (nodes_out, activation) in enumerate(zip(nodes_list, activations)): + layers = [ + base_layer_selector( + layer_type, + kernel_initializer=initializer_generator(replica_seed, i_layer), + units=nodes_out, + activation=activation, + input_shape=(nodes_in,), + **custom_args, + ) + for replica_seed in replica_seeds + ] + list_of_pdf_layers.append(layers) + nodes_in = int(nodes_out) + + # add dropout as second to last layer + if dropout > 0: + dropout_layer = base_layer_selector("dropout", rate=dropout) + list_of_pdf_layers.insert(-2, dropout_layer) + + # In case of per flavour network, concatenate at the last layer + if layer_type == "dense_per_flavour": + concat = base_layer_selector("concatenate") + list_of_pdf_layers[-1] = [lambda x: concat(layer(x)) for layer in list_of_pdf_layers[-1]] + + # Apply all layers to the input to create the models + pdfs = [layer(x_input) for layer in list_of_pdf_layers[0]] + for layers in list_of_pdf_layers[1:]: + # Since some layers (dropout) are shared, we have to treat them separately + if type(layers) is list: + pdfs = [layer(x) for layer, x in zip(layers, pdfs)] + else: + pdfs = [layers(x) for x in pdfs] - # Note: using a Sequential model would be more appropriate, but it would require - # creating a MetaSequential model. - x = Input(shape=(None, input_dimensions), batch_size=1, name='xgrids_processed') - pdf = x - for layer in list_of_pdf_layers: - pdf = layer(pdf) + models = [ + MetaModel({'NN_input': x_input}, pdf, name=f"NN_{i_replica}") + for i_replica, pdf in enumerate(pdfs) + ] - model = MetaModel({'NN_input': x}, pdf, name=name) - return model + return models diff --git a/n3fit/src/n3fit/tests/test_modelgen.py b/n3fit/src/n3fit/tests/test_modelgen.py index 4cec4574ee..d50b98728a 100644 --- a/n3fit/src/n3fit/tests/test_modelgen.py +++ b/n3fit/src/n3fit/tests/test_modelgen.py @@ -5,62 +5,48 @@ It checks that both the number of layers and the shape of the weights of the layers are what is expected """ -import numpy as np -import n3fit.model_gen -from n3fit.backends import MetaModel -from n3fit.backends import operations as op +from n3fit.model_gen import generate_nn INSIZE = 16 OUT_SIZES = (4, 3) BASIS_SIZE = 3 +COMMON_ARGS = { + "nodes_in": INSIZE, + "nodes": OUT_SIZES, + "activations": ["sigmoid", "tanh"], + "initializer_name": "glorot_uniform", + "replica_seeds": [0], + "dropout": 0.0, + "regularizer": None, + "regularizer_args": {}, + "last_layer_nodes": BASIS_SIZE, +} + def test_generate_dense_network(): - nodes_in = INSIZE - nodes_out = OUT_SIZES - activations = ["sigmoid", "tanh"] - layers = n3fit.model_gen.generate_dense_network(nodes_in, nodes_out, activations) - arr = np.random.rand(1, INSIZE) - input_layer = op.numpy_to_input(arr) - curr_layer = input_layer - for layer in layers: - curr_layer = layer(curr_layer) - modelito = MetaModel({"input": input_layer}, curr_layer) + nn = generate_nn("dense", **COMMON_ARGS)[0] + # The number of layers should be input layer + len(OUT_SIZES) - assert len(modelito.layers) == len(OUT_SIZES) + 1 + assert len(nn.layers) == len(OUT_SIZES) + 1 # Check that the number of parameters is as expected # We expect 4 weights where the two first ones are # (INSIZE, OUT_SIZE[0]) (OUT_SIZE[0],) # and the second one # (OUT_SIZE[0], OUT_SIZE[1]) (OUT_SIZE[1],) - expected_sizes = [ - (INSIZE, OUT_SIZES[0]), - (OUT_SIZES[0],), - OUT_SIZES, - (OUT_SIZES[1],), - ] - for weight, esize in zip(modelito.weights, expected_sizes): + expected_sizes = [(INSIZE, OUT_SIZES[0]), (OUT_SIZES[0],), OUT_SIZES, (OUT_SIZES[1],)] + for weight, esize in zip(nn.weights, expected_sizes): assert weight.shape == esize def test_generate_dense_per_flavour_network(): - nodes_in = INSIZE - nodes_out = OUT_SIZES - activations = ["sigmoid", "tanh"] - layers = n3fit.model_gen.generate_dense_per_flavour_network( - nodes_in, nodes_out, activations, basis_size=BASIS_SIZE - ) - arr = np.random.rand(1, INSIZE) - input_layer = op.numpy_to_input(arr) - curr_layer = input_layer - for layer in layers: - curr_layer = layer(curr_layer) - modelito = MetaModel({"input": input_layer}, curr_layer) + nn = generate_nn("dense_per_flavour", **COMMON_ARGS)[0] + # The number of layers should be input + BASIS_SIZE*len(OUT_SIZES) + concatenate - assert len(modelito.layers) == BASIS_SIZE * len(OUT_SIZES) + 2 + assert len(nn.layers) == BASIS_SIZE * len(OUT_SIZES) + 2 # The shape for this network of denses for flavours will depend on the basis_size expected_sizes = [] expected_sizes += BASIS_SIZE * [(INSIZE, OUT_SIZES[0]), (OUT_SIZES[0],)] expected_sizes += BASIS_SIZE * [(OUT_SIZES[0], 1), (1,)] - for weight, esize in zip(modelito.weights, expected_sizes): + for weight, esize in zip(nn.weights, expected_sizes): assert weight.shape == esize