NNPDF · scarlehoff · Dec 15, 2020 · Dec 16, 2020 · Dec 16, 2020 · Dec 17, 2020
diff --git a/doc/sphinx/source/n3fit/runcard_detailed.rst b/doc/sphinx/source/n3fit/runcard_detailed.rst
@@ -9,8 +9,9 @@ In this section we fine-grain the explanation of the different parameters that e
 - :ref:`networkarch-label`
 - :ref:`optimizer-label`
 - :ref:`positivity-label`
-- :ref:`otheroptions-label`
 - :ref:`tensorboard-label`
+- :ref:`parallel-label`
+- :ref:`otheroptions-label`
 
 
 .. _preprocessing-label:
@@ -206,24 +207,6 @@ Threshold :math:`\chi2`
 - ``threshold_chi2``: sets a maximum validation :math:`\chi2` for the stopping to activate. Avoids (too) early stopping.
 
 
-Save and load weights of the model
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. code-block:: yaml
-
-    fitting:
-        save: "weights.h5"
-        load: "weights.h5"
-
-- ``save``: saves the weights of the PDF model in the selected file in the replica folder.
-- ``load``: loads the weights of the PDF model from the selected file.
-
-Since the weights depend only on the architecture of the Neural Network,
-it is possible to save the weights of a Neural Network trained with one set of hyperparameters and experiments
-and load it in a different runcard and continue the training from there.
-
-While the load file is read as an absolute path, the file to save to will be found
-inside the replica folder.
 
 
 .. _tensorboard-label:
@@ -258,3 +241,54 @@ Logging details can be visualized in the browser with the following command:
 Logging details will include the value of the loss for each experiment over time,
 the values of the weights of the NN,
 as well as a detailed analysis of the amount of time that TensorFlow spent on each operation.
+
+
+.. _parallel-label:
+
+Running fits in parallel
+------------------------
+
+It is possible to run fits in parallel with ``n3fit`` by using the ``parallel_models``
+flag in the runcard (by default the number of ``parallel_models`` is set to 1).
+Running in parallel can be quite hard on memory and it is only advantageous when
+fitting on a GPU, where one can find a speed up equal to the number of models run
+in parallel (each model being a different replica).
+
+At present it cannot be used together with the ``hyperopt`` module.
+
+
+.. _otheroptions-label:
+
+Other options
+-------------
+
+Threshold :math:`\chi2`
+^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: yaml
+
+    fitting:
+        parameters:
+            threshold_chi2: 4.0
+
+- ``threshold_chi2``: sets a maximum validation :math:`\chi2` for the stopping to activate. Avoids (too) early stopping.
+
+
+Save and load weights of the model
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: yaml
+
+    fitting:
+        save: "weights.h5"
+        load: "weights.h5"
+
+- ``save``: saves the weights of the PDF model in the selected file in the replica folder.
+- ``load``: loads the weights of the PDF model from the selected file.
+
+Since the weights depend only on the architecture of the Neural Network,
+it is possible to save the weights of a Neural Network trained with one set of hyperparameters and experiments
+and load it in a different runcard and continue the training from there.
+
+While the load file is read as an absolute path, the file to save to will be found
+inside the replica folder.
diff --git a/n3fit/runcards/Basic_runcard_parallel.yml b/n3fit/runcards/Basic_runcard_parallel.yml
@@ -0,0 +1,106 @@
+#
+# Configuration file for n3fit
+#
+
+############################################################
+description: Basic runcard
+
+############################################################
+# frac: training fraction
+# ewk: apply ewk k-factors
+# sys: systematics treatment (see systypes)
+dataset_inputs:
+- { dataset: SLACP, frac: 0.5}
+- { dataset: NMC,   frac: 0.5  }
+- { dataset: NMCPD, frac: 0.5 }
+- { dataset: CMSJETS11, frac: 0.5, sys: 10 }
+
+############################################################
+datacuts:
+  t0pdfset     : NNPDF31_nlo_as_0118 # PDF set to generate t0 covmat
+  q2min        : 3.49                # Q2 minimum
+  w2min        : 12.5                # W2 minimum
+  combocuts    : NNPDF31             # NNPDF3.0 final kin. cuts
+  jetptcut_tev : 0                   # jet pt cut for tevatron
+  jetptcut_lhc : 0                   # jet pt cut for lhc
+  wptcut_lhc   : 30.0                # Minimum pT for W pT diff distributions
+  jetycut_tev  : 1e30                # jet rap. cut for tevatron
+  jetycut_lhc  : 1e30                # jet rap. cut for lhc
+  dymasscut_min: 0                   # dy inv.mass. min cut
+  dymasscut_max: 1e30                # dy inv.mass. max cut
+  jetcfactcut  : 1e30                # jet cfact. cut
+
+############################################################
+theory:
+  theoryid: 53        # database id
+
+############################################################
+fitting:
+  trvlseed: 1
+  nnseed: 2
+  mcseed: 3
+
+  genrep: False     # true = generate MC replicas, false = use real data
+
+  parameters: # This defines the parameter dictionary that is passed to the Model Trainer
+    nodes_per_layer: [15, 10, 8]
+    activation_per_layer: ['sigmoid', 'sigmoid', 'linear']
+    initializer: 'glorot_normal'
+    optimizer:
+      optimizer_name: 'RMSprop'
+      learning_rate: 0.01
+      clipnorm: 1.0
+    epochs: 900
+    positivity:
+      multiplier: 1.05 # When any of the multiplier and/or the initial is not set
+      initial: # the poslambda will be used instead to compute these values per dataset
+      threshold: 1e-5
+    stopping_patience: 0.30 # percentage of the number of epochs
+    layer_type: 'dense'
+    dropout: 0.0
+    threshold_chi2: 5.0
+
+  # NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7)
+  # EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7)
+  # EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7)
+  # FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7)
+  fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc.
+  basis:
+      # remeber to change the name of PDF accordingly with fitbasis
+      # pos: True for NN squared
+      # mutsize: mutation size
+      # mutprob: mutation probability
+      # smallx, largex: preprocessing ranges
+      - { fl: sng, pos: False, mutsize: [15], mutprob: [0.05], smallx: [1.05,1.19], largex: [1.47,2.70], trainable: False }
+      - { fl: g,   pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.94,1.25], largex: [0.11,5.87], trainable: False }
+      - { fl: v,   pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.54,0.75], largex: [1.15,2.76], trainable: False }
+      - { fl: v3,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.21,0.57], largex: [1.35,3.08] }
+      - { fl: v8,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.52,0.76], largex: [0.77,3.56], trainable: True }
+      - { fl: t3,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.37,1.52], largex: [1.74,3.39] }
+      - { fl: t8,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.56,1.29], largex: [1.45,3.03] }
+      - { fl: cp,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.12,1.19], largex: [1.83,6.70] }
+
+############################################################
+positivity:
+  posdatasets:
+    - { dataset: POSF2U,   poslambda: 1e6 }  # Positivity Lagrange Multiplier
+    - { dataset: POSFLL,   poslambda: 1e4 }
+
+############################################################
+integrability:
+  integdatasets:
+    - {dataset: INTEGXT3,   poslambda: 1e2}
+
+############################################################
+lhagrid:
+  nx  : 150
+  xmin: 1e-9
+  xmed: 0.1
+  xmax: 1.0
+  nq  : 50
+  qmax: 1e5
+
+############################################################
+debug: True
+maxcores: 8
+parallel_models: 2
diff --git a/n3fit/runcards/DIS_diagonal_l2reg_example.yml b/n3fit/runcards/DIS_diagonal_l2reg_example.yml
@@ -76,7 +76,7 @@ fitting:
     optimizer:
       learning_rate: 1.0
       optimizer_name: 'Adadelta'
-    epochs: 40000
+    epochs: 4000
     positivity:
       multiplier: 1.09
       initial: 10.0