Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
2a49b60
Move replica loop into generate_nn function
APJansen Dec 8, 2023
9c23fa5
Simplify handling of dropout
APJansen Dec 8, 2023
683e354
Factor out layer_generator in generate_dense_network
APJansen Dec 8, 2023
d02e118
Refactor dense_per_flavor_network
APJansen Dec 8, 2023
f907d7f
Move setting of last nodes to generate_nn
APJansen Dec 8, 2023
aa76bc7
Add constant arguments
APJansen Dec 8, 2023
1ad7960
Add constant arguments
APJansen Dec 8, 2023
7758497
Move dropout to generate_nn
APJansen Dec 8, 2023
2d388d9
Move concatenation of per_flavor layers into generate_nn
APJansen Dec 8, 2023
1ef87dc
Make the two layer generators almost equal
APJansen Dec 8, 2023
806e2c1
remove separate dense and dense_per_flavor functions
APJansen Dec 8, 2023
bdbc3c3
Add documentation.
APJansen Dec 8, 2023
e3f9f0c
Simplify per_flavor layer concatenation
APJansen Dec 8, 2023
3d9070f
Reverse order of loops over replicas and layers
APJansen Dec 8, 2023
c8300c8
Fixes for dropout
APJansen Dec 8, 2023
0cf23f2
Fixes for per_flavour
APJansen Dec 8, 2023
b0a8e3b
Fix issue with copying over nodes for per_flavour layer
APJansen Dec 11, 2023
97d2efe
Fix seeds in per_flavour layer
APJansen Dec 11, 2023
4c4a2d5
Add error for combination of dropout with per_flavour layers
APJansen Dec 11, 2023
2287194
Add basis_size argument to per_flavour layer
APJansen Dec 11, 2023
2f68e3d
Fix model_gen tests to use new generate_nn in favor of now removed ge…
APJansen Dec 11, 2023
4dd1649
Allow for nodes to be a tuple
APJansen Dec 11, 2023
6bd6466
Move dropout, per_flavour check to checks
APJansen Dec 11, 2023
2cd9e52
Clarify layer type check
APJansen Dec 14, 2023
1ae1b84
Clarify naming in nn_generator
APJansen Dec 14, 2023
e7a7cb4
Remove initializer_name argument
APJansen Dec 14, 2023
07c1e7d
clarify comment
APJansen Dec 14, 2023
25b8308
Add comment on shared layers
APJansen Dec 14, 2023
692014b
Rewrite comprehension over replica seeds
APJansen Dec 14, 2023
903c75b
Add check on layer type
APJansen Dec 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions n3fit/src/n3fit/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,29 @@ def check_initializer(initializer):
raise CheckError(f"Initializer {initializer} not accepted by {MetaLayer}")


def check_layer_type_implemented(parameters):
"""Checks whether the layer_type is implemented"""
layer_type = parameters.get("layer_type")
implemented_types = ["dense", "dense_per_flavour"]
if layer_type not in implemented_types:
raise CheckError(
f"Layer type {layer_type} not implemented, must be one of {implemented_types}"
)


def check_dropout(parameters):
"""Checks the dropout setup (positive and smaller than 1.0)"""
dropout = parameters.get("dropout")
if dropout is not None and not 0.0 <= dropout <= 1.0:
raise CheckError(f"Dropout must be between 0 and 1, got: {dropout}")

layer_type = parameters.get("layer_type")
if dropout is not None and dropout > 0.0 and layer_type == "dense_per_flavour":
raise CheckError(
"Dropout is not compatible with the dense_per_flavour layer type, "
"please use instead `parameters::layer_type: dense`"
)


def check_tensorboard(tensorboard):
"""Check that the tensorbard callback can be enabled correctly"""
Expand Down Expand Up @@ -168,6 +185,7 @@ def wrapper_check_NN(basis, tensorboard, save, load, parameters):
check_consistent_layers(parameters)
check_basis_with_layers(basis, parameters)
check_stopping(parameters)
check_layer_type_implemented(parameters)
check_dropout(parameters)
check_lagrange_multipliers(parameters, "integrability")
check_lagrange_multipliers(parameters, "positivity")
Expand Down
254 changes: 114 additions & 140 deletions n3fit/src/n3fit/model_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,103 +299,6 @@ def observable_generator(
return layer_info


# Network generation functions
def generate_dense_network(
nodes_in: int,
nodes: int,
activations: List[str],
initializer_name: str = "glorot_normal",
seed: int = 0,
dropout_rate: float = 0.0,
regularizer: str = None,
):
"""
Generates a dense network

the dropout rate, if selected, is set
for the next to last layer (i.e., the last layer of the dense network before getting to
the output layer for the basis choice)
"""
list_of_pdf_layers = []
number_of_layers = len(nodes)
if dropout_rate > 0:
dropout_layer = number_of_layers - 2
else:
dropout_layer = -1
for i, (nodes_out, activation) in enumerate(zip(nodes, activations)):
# if we have dropout set up, add it to the list
if dropout_rate > 0 and i == dropout_layer:
list_of_pdf_layers.append(base_layer_selector("dropout", rate=dropout_rate))

# select the initializer and move the seed
init = MetaLayer.select_initializer(initializer_name, seed=seed + i)

# set the arguments that will define the layer
arguments = {
"kernel_initializer": init,
"units": int(nodes_out),
"activation": activation,
"input_shape": (nodes_in,),
"kernel_regularizer": regularizer,
}

layer = base_layer_selector("dense", **arguments)

list_of_pdf_layers.append(layer)
nodes_in = int(nodes_out)
return list_of_pdf_layers


def generate_dense_per_flavour_network(
nodes_in, nodes, activations, initializer_name="glorot_normal", seed=0, basis_size=8
):
"""
For each flavour generates a dense network of the chosen size

"""
list_of_pdf_layers = []
number_of_layers = len(nodes)
current_seed = seed
for i, (nodes_out, activation) in enumerate(zip(nodes, activations)):
initializers = []
for _ in range(basis_size):
# select the initializer and move the seed
initializers.append(MetaLayer.select_initializer(initializer_name, seed=current_seed))
current_seed += 1

# set the arguments that will define the layer
# but careful, the last layer must be nodes = 1
# TODO the mismatch is due to the fact that basis_size
# is set to the number of nodes of the last layer when it should
# come from the runcard
if i == number_of_layers - 1:
nodes_out = 1
arguments = {
"kernel_initializer": initializers,
"units": nodes_out,
"activation": activation,
"input_shape": (nodes_in,),
"basis_size": basis_size,
}

layer = base_layer_selector("dense_per_flavour", **arguments)

if i == number_of_layers - 1:
# For the last layer, apply concatenate
concat = base_layer_selector("concatenate")

def output_layer(ilayer):
result = layer(ilayer)
return concat(result)

list_of_pdf_layers.append(output_layer)
else:
list_of_pdf_layers.append(layer)

nodes_in = int(nodes_out)
return list_of_pdf_layers


def generate_pdf_model(
nodes: List[int] = None,
activations: List[str] = None,
Expand Down Expand Up @@ -670,7 +573,6 @@ def pdfNN_layer_generator(
sumrule_layer = lambda x: x

# Only these layers change from replica to replica:
nn_replicas = []
preprocessing_factor_replicas = []
for i_replica, replica_seed in enumerate(seed):
preprocessing_factor_replicas.append(
Expand All @@ -682,21 +584,19 @@ def pdfNN_layer_generator(
large_x=not subtract_one,
)
)
nn_replicas.append(
generate_nn(
layer_type=layer_type,
input_dimensions=nn_input_dimensions,
nodes=nodes,
activations=activations,
initializer_name=initializer_name,
replica_seed=replica_seed,
dropout=dropout,
regularizer=regularizer,
regularizer_args=regularizer_args,
last_layer_nodes=last_layer_nodes,
name=f"NN_{i_replica}",
)
)

nn_replicas = generate_nn(
layer_type=layer_type,
nodes_in=nn_input_dimensions,
nodes=nodes,
activations=activations,
initializer_name=initializer_name,
replica_seeds=seed,
dropout=dropout,
regularizer=regularizer,
regularizer_args=regularizer_args,
last_layer_nodes=last_layer_nodes,
)

# Apply NN layers for all replicas to a given input grid
def neural_network_replicas(x, postfix=""):
Expand Down Expand Up @@ -780,44 +680,118 @@ def compute_unnormalized_pdf(x, postfix=""):

def generate_nn(
layer_type: str,
input_dimensions: int,
nodes_in: int,
nodes: List[int],
activations: List[str],
initializer_name: str,
replica_seed: int,
replica_seeds: List[int],
dropout: float,
regularizer: str,
regularizer_args: dict,
last_layer_nodes: int,
name: str,
) -> MetaModel:
"""
Create the part of the model that contains all of the actual neural network
layers.
layers, for each replica.

Parameters
----------
layer_type: str
Type of layer to use. Can be "dense" or "dense_per_flavour".
nodes_in: int
Number of nodes in the input layer.
nodes: List[int]
Number of nodes in each hidden layer.
activations: List[str]
Activation function to use in each hidden layer.
initializer_name: str
Name of the initializer to use.
replica_seeds: List[int]
List of seeds to use for each replica.
dropout: float
Dropout rate to use (if 0, no dropout is used).
regularizer: str
Name of the regularizer to use.
regularizer_args: dict
Arguments to pass to the regularizer.
last_layer_nodes: int
Number of nodes in the last layer.

Returns
-------
nn_replicas: List[MetaModel]
List of MetaModel objects, one for each replica.
"""
common_args = {
'nodes_in': input_dimensions,
'nodes': nodes,
'activations': activations,
'initializer_name': initializer_name,
'seed': replica_seed,
}
if layer_type == "dense":
nodes_list = list(nodes) # so we can modify it
x_input = Input(shape=(None, nodes_in), batch_size=1, name='xgrids_processed')

custom_args = {}
if layer_type == "dense_per_flavour":
# set the arguments that will define the layer
# but careful, the last layer must be nodes = 1
# TODO the mismatch is due to the fact that basis_size
# is set to the number of nodes of the last layer when it should
# come from the runcard
nodes_list[-1] = 1
basis_size = last_layer_nodes
custom_args['basis_size'] = basis_size

def initializer_generator(seed, i_layer):
seed += i_layer * basis_size
initializers = [
MetaLayer.select_initializer(initializer_name, seed=seed + b)
for b in range(basis_size)
]
return initializers

elif layer_type == "dense":
reg = regularizer_selector(regularizer, **regularizer_args)
list_of_pdf_layers = generate_dense_network(
**common_args, dropout_rate=dropout, regularizer=reg
)
elif layer_type == "dense_per_flavour":
list_of_pdf_layers = generate_dense_per_flavour_network(
**common_args, basis_size=last_layer_nodes
)
custom_args['regularizer'] = reg

def initializer_generator(seed, i_layer):
seed += i_layer
return MetaLayer.select_initializer(initializer_name, seed=seed)

Comment thread
scarlehoff marked this conversation as resolved.
# First create all the layers...
# list_of_pdf_layers[d][r] is the layer at depth d for replica r
list_of_pdf_layers = []
for i_layer, (nodes_out, activation) in enumerate(zip(nodes_list, activations)):
layers = [
base_layer_selector(
layer_type,
kernel_initializer=initializer_generator(replica_seed, i_layer),
units=nodes_out,
activation=activation,
input_shape=(nodes_in,),
**custom_args,
)
for replica_seed in replica_seeds
]
list_of_pdf_layers.append(layers)
nodes_in = int(nodes_out)

# add dropout as second to last layer
if dropout > 0:
dropout_layer = base_layer_selector("dropout", rate=dropout)
list_of_pdf_layers.insert(-2, dropout_layer)

# In case of per flavour network, concatenate at the last layer
if layer_type == "dense_per_flavour":
concat = base_layer_selector("concatenate")
list_of_pdf_layers[-1] = [lambda x: concat(layer(x)) for layer in list_of_pdf_layers[-1]]

# Apply all layers to the input to create the models
pdfs = [layer(x_input) for layer in list_of_pdf_layers[0]]
for layers in list_of_pdf_layers[1:]:
# Since some layers (dropout) are shared, we have to treat them separately
if type(layers) is list:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, add a comment here.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean on why the if statement is needed? I added a comment, it's because dropout is shared between layers. I could also remove the if statement and replace dropout_layer with [dropout_layer for _ in range(num_replicas)] or something.

pdfs = [layer(x) for layer, x in zip(layers, pdfs)]
else:
pdfs = [layers(x) for x in pdfs]

# Note: using a Sequential model would be more appropriate, but it would require
# creating a MetaSequential model.
x = Input(shape=(None, input_dimensions), batch_size=1, name='xgrids_processed')
pdf = x
for layer in list_of_pdf_layers:
pdf = layer(pdf)
models = [
MetaModel({'NN_input': x_input}, pdf, name=f"NN_{i_replica}")
for i_replica, pdf in enumerate(pdfs)
]
Comment thread
APJansen marked this conversation as resolved.

model = MetaModel({'NN_input': x}, pdf, name=name)
return model
return models
Loading