NNPDF · scarrazza · Dec 16, 2020 · Oct 16, 2020 · Oct 16, 2020 · Oct 16, 2020
diff --git a/n3fit/runcards/Basic_hyperopt.yml b/n3fit/runcards/Basic_hyperopt.yml
@@ -109,14 +109,7 @@ fitting:
   trvlseed: 1
   nnseed: 2
   mcseed: 3
-  seed: 9453862133528           # set the seed for the random generator
   genrep: false        # true = generate MC replicas, false = use real data
-  rngalgo: 0        # 0 = ranlux, 1 = cmrg, see randomgenerator.cc
-  fitmethod: NGA    # Minimization algorithm
-  ngen: 30000       # Maximum number of generations
-  nmutants: 80      # Number of mutants for replica
-  paramtype: NN
-  nnodes: [2, 5, 3, 1]
 
   parameters: # This defines the parameter dictionary that is passed to the Model Trainer
     nodes_per_layer: [35, 25, 8]
@@ -155,20 +148,6 @@ fitting:
       - { fl: t8,  pos: False, mutsize: [15], trainable: False, mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] }
       - { fl: cp,  pos: False, mutsize: [15], trainable: False, mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] }
 
-############################################################
-stopping:
-  stopmethod: LOOKBACK  # Stopping method
-  lbdelta: 0            # Delta for look-back stopping
-  mingen: 0             # Minimum number of generations
-  window: 500           # Window for moving average
-  minchi2: 3.5          # Minimum chi2 
-  minchi2exp: 6.0       # Minimum chi2 for experiments
-  nsmear: 200           # Smear for stopping
-  deltasm: 200          # Delta smear for stopping
-  rv: 2                 # Ratio for validation stopping
-  rt: 0.5               # Ratio for training stopping
-  epsilon: 1e-6         # Gradient epsilon
-
 ############################################################
 positivity:
   posdatasets:
@@ -180,18 +159,6 @@ positivity:
   - {dataset: POSDYD, poslambda: 1e10}
   - {dataset: POSDYS, poslambda: 1e10}
 
-############################################################
-closuretest:
-  filterseed: 0     # Random seed to be used in filtering data partitions
-  fakedata: false     # true = to use FAKEPDF to generate pseudo-data
-  fakepdf: MSTW2008nlo68cl      # Theory input for pseudo-data
-  errorsize: 1.0    # uncertainties rescaling
-  fakenoise: false    # true = to add random fluctuations to pseudo-data
-  rancutprob: 1.0   # Fraction of data to be included in the fit
-  rancutmethod: 0   # Method to select rancutprob data fraction
-  rancuttrnval: false # 0(1) to output training(valiation) chi2 in report
-  printpdf4gen: false # To print info on PDFs during minimization
-
 ############################################################
 lhagrid:
   nx: 150

diff --git a/n3fit/runcards/Basic_runcard.yml b/n3fit/runcards/Basic_runcard.yml
@@ -47,9 +47,7 @@ fitting:
     weight_freq: 100
     profiling: False
 
-  seed     : 9453862133528      # set the seed for the random generator
   genrep   : True     # true = generate MC replicas, false = use real data
-  rngalgo  : 0      # 0 = ranlux, 1 = cmrg, see randomgenerator.cc
 
   parameters: # This defines the parameter dictionary that is passed to the Model Trainer
     nodes_per_layer: [15, 10, 8]
@@ -89,20 +87,6 @@ fitting:
       - { fl: t8,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.56,1.29], largex: [1.45,3.03] }
       - { fl: cp,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.12,1.19], largex: [1.83,6.70] }
 
-############################################################
-stopping:
-  stopmethod: LOOKBACK  # Stopping method
-  lbdelta   : 0         # Delta for look-back stopping
-  mingen    : 0         # Minimum number of generations
-  window    : 500       # Window for moving average
-  minchi2   : 3.5       # Minimum chi2
-  minchi2exp: 6.0       # Minimum chi2 for experiments
-  nsmear    : 200       # Smear for stopping
-  deltasm   : 200       # Delta smear for stopping
-  rv        : 2         # Ratio for validation stopping
-  rt        : 0.5       # Ratio for training stopping
-  epsilon   : 1e-6      # Gradient epsilon
-
 ############################################################
 positivity:
   posdatasets:
@@ -113,18 +97,6 @@ positivity:
 integrability:
   integdatasets:
     - {dataset: INTEGXT3,   poslambda: 1e2}
-
-############################################################
-closuretest:
-  filterseed  : 0   # Random seed to be used in filtering data partitions
-  fakedata    : False # true = to use FAKEPDF to generate pseudo-data
-  fakepdf     : MSTW2008nlo68cl # Theory input for pseudo-data
-  errorsize   : 1.0 # uncertainties rescaling
-  fakenoise   : False # true = to add random fluctuations to pseudo-data
-  rancutprob  : 1.0 # Fraction of data to be included in the fit
-  rancutmethod: 0   # Method to select rancutprob data fraction
-  rancuttrnval: False # 0(1) to output training(valiation) chi2 in report
-  printpdf4gen: False # To print info on PDFs during minimization
 
 ############################################################
 lhagrid:

diff --git a/n3fit/runcards/DIS_diagonal_l2reg_example.yml b/n3fit/runcards/DIS_diagonal_l2reg_example.yml
@@ -66,13 +66,7 @@ fitting:
   save: False
   load: False
 
-  seed     : 9453862133528      # set the seed for the random generator
   genrep   : True     # true = generate MC replicas, false = use real data
-  rngalgo  : 0      # 0 = ranlux, 1 = cmrg, see randomgenerator.cc
-  fitmethod: NGA    # Minimization algorithm
-  nmutants : 80     # Number of mutants for replica
-  paramtype: NN
-  nnodes   : [2,5,3,1]
   diagonal_basis: True
 
   parameters: # This defines the parameter dictionary that is passed to the Model Trainer
@@ -112,20 +106,6 @@ fitting:
       - { fl: t8,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] }
       - { fl: cp,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] }
 
-############################################################
-stopping:
-  stopmethod: LOOKBACK  # Stopping method
-  lbdelta   : 0         # Delta for look-back stopping
-  mingen    : 0         # Minimum number of generations
-  window    : 500       # Window for moving average
-  minchi2   : 3.5       # Minimum chi2 
-  minchi2exp: 6.0       # Minimum chi2 for experiments
-  nsmear    : 200       # Smear for stopping
-  deltasm   : 200       # Delta smear for stopping
-  rv        : 2         # Ratio for validation stopping
-  rt        : 0.5       # Ratio for training stopping
-  epsilon   : 1e-6      # Gradient epsilon
-
 ############################################################
 positivity:
   posdatasets:
@@ -137,18 +117,6 @@ positivity:
     - { dataset: POSDYD,   poslambda: 1e10 }
     - { dataset: POSDYS,   poslambda: 1e10 }
 
-############################################################
-closuretest:
-  filterseed  : 0   # Random seed to be used in filtering data partitions
-  fakedata    : False # true = to use FAKEPDF to generate pseudo-data
-  fakepdf     : MSTW2008nlo68cl # Theory input for pseudo-data
-  errorsize   : 1.0 # uncertainties rescaling
-  fakenoise   : False # true = to add random fluctuations to pseudo-data
-  rancutprob  : 1.0 # Fraction of data to be included in the fit
-  rancutmethod: 0   # Method to select rancutprob data fraction
-  rancuttrnval: False # 0(1) to output training(valiation) chi2 in report
-  printpdf4gen: False # To print info on PDFs during minimization
-
 ############################################################
 lhagrid:
   nx  : 150

diff --git a/n3fit/runcards/PN3_DIS_example.yml b/n3fit/runcards/PN3_DIS_example.yml
@@ -66,13 +66,7 @@ fitting:
   save: False
   load: False
 
-  seed     : 9453862133528      # set the seed for the random generator
   genrep   : True     # true = generate MC replicas, false = use real data
-  rngalgo  : 0      # 0 = ranlux, 1 = cmrg, see randomgenerator.cc
-  fitmethod: NGA    # Minimization algorithm
-  nmutants : 80     # Number of mutants for replica
-  paramtype: NN
-  nnodes   : [2,5,3,1]
 
   parameters: # This defines the parameter dictionary that is passed to the Model Trainer
     nodes_per_layer: [35, 25, 8]
@@ -109,20 +103,6 @@ fitting:
       - { fl: t8,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] }
       - { fl: cp,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] }
 
-############################################################
-stopping:
-  stopmethod: LOOKBACK  # Stopping method
-  lbdelta   : 0         # Delta for look-back stopping
-  mingen    : 0         # Minimum number of generations
-  window    : 500       # Window for moving average
-  minchi2   : 3.5       # Minimum chi2 
-  minchi2exp: 6.0       # Minimum chi2 for experiments
-  nsmear    : 200       # Smear for stopping
-  deltasm   : 200       # Delta smear for stopping
-  rv        : 2         # Ratio for validation stopping
-  rt        : 0.5       # Ratio for training stopping
-  epsilon   : 1e-6      # Gradient epsilon
-
 ############################################################
 positivity:
   posdatasets:
@@ -134,18 +114,6 @@ positivity:
     - { dataset: POSDYD,   poslambda: 1e10 }
     - { dataset: POSDYS,   poslambda: 1e10 }
 
-############################################################
-closuretest:
-  filterseed  : 0   # Random seed to be used in filtering data partitions
-  fakedata    : False # true = to use FAKEPDF to generate pseudo-data
-  fakepdf     : MSTW2008nlo68cl # Theory input for pseudo-data
-  errorsize   : 1.0 # uncertainties rescaling
-  fakenoise   : False # true = to add random fluctuations to pseudo-data
-  rancutprob  : 1.0 # Fraction of data to be included in the fit
-  rancutmethod: 0   # Method to select rancutprob data fraction
-  rancuttrnval: False # 0(1) to output training(valiation) chi2 in report
-  printpdf4gen: False # To print info on PDFs during minimization
-
 ############################################################
 lhagrid:
   nx  : 150

diff --git a/n3fit/runcards/developing.yml b/n3fit/runcards/developing.yml
@@ -110,14 +110,7 @@ fitting:
   mcseed: 3
   load: 'developing_weights.h5'
 
-  seed     : 9453862133528      # set the seed for the random generator
-  genrep   : true      # on = generate MC replicas, off = use real data
-  rngalgo  : 0      # 0 = ranlux, 1 = cmrg, see randomgenerator.cc
-  fitmethod: NGA    # Minimization algorithm
-  ngen     : 30000  # Maximum number of generations
-  nmutants : 80     # Number of mutants for replica
-  paramtype: NN
-  nnodes   : [2,5,3,1]
+  genrep   : true      # true = generate MC replicas, false = use real data
 
   parameters: # This defines the parameter dictionary that is passed to the Model Trainer
     nodes_per_layer: [50, 35, 25, 8]
@@ -157,20 +150,6 @@ fitting:
       - { fl: t8,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] }
       - { fl: cp,  pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] }
 
-############################################################
-stopping:
-  stopmethod: LOOKBACK  # Stopping method
-  lbdelta   : 0         # Delta for look-back stopping
-  mingen    : 0         # Minimum number of generations
-  window    : 500       # Window for moving average
-  minchi2   : 3.5       # Minimum chi2
-  minchi2exp: 6.0       # Minimum chi2 for experiments
-  nsmear    : 200       # Smear for stopping
-  deltasm   : 200       # Delta smear for stopping
-  rv        : 2         # Ratio for validation stopping
-  rt        : 0.5       # Ratio for training stopping
-  epsilon   : 1e-6      # Gradient epsilon
-
 ############################################################
 positivity:
   posdatasets:
@@ -186,18 +165,6 @@ integrability:
     - { dataset: INTEGXT8,   poslambda: 1e2 }
     - { dataset: INTEGXT3,   poslambda: 1e2 }
 
-############################################################
-closuretest:
-  filterseed  : 0   # Random seed to be used in filtering data partitions
-  fakedata    : False # on = to use FAKEPDF to generate pseudo-data
-  fakepdf     : MSTW2008nlo68cl # Theory input for pseudo-data
-  errorsize   : 1.0 # uncertainties rescaling
-  fakenoise   : False # on = to add random fluctuations to pseudo-data
-  rancutprob  : 1.0 # Fraction of data to be included in the fit
-  rancutmethod: 0   # Method to select rancutprob data fraction
-  rancuttrnval: False # 0(1) to output training(valiation) chi2 in report
-  printpdf4gen: False # To print info on PDFs during minimization
-
 ############################################################
 lhagrid:
   nx  : 150

diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py
@@ -22,7 +22,7 @@
 N3FIT_FIXED_CONFIG = dict(
     use_cuts = 'internal',
     use_t0 = True,
-    actions_ = ['datacuts::theory::closuretest performfit']
+    actions_ = []
 )
 
 N3FIT_PROVIDERS = ["n3fit.performfit", "validphys.results"]
@@ -109,6 +109,14 @@ def from_yaml(cls, o, *args, **kwargs):
             raise ConfigError(f"Failed to parse yaml file: {e}")
         if not isinstance(file_content, dict):
             raise ConfigError(f"Expecting input runcard to be a mapping, " f"not '{type(file_content)}'.")
+
+        if file_content.get('closuretest') is not None:
+            N3FIT_FIXED_CONFIG['actions_'].append(
+                'datacuts::theory::closuretest performfit')
+        else:
+            N3FIT_FIXED_CONFIG['actions_'].append(
+                'datacuts::theory performfit')
+
         file_content.update(N3FIT_FIXED_CONFIG)
         return cls(file_content, *args, **kwargs)
 

diff --git a/validphys2/src/validphys/closuretest/closure_checks.py b/validphys2/src/validphys/closuretest/closure_checks.py
@@ -23,8 +23,22 @@ def check_use_fitcommondata(use_fitcommondata):
 @make_argcheck
 def check_fit_isclosure(fit):
     """Check the input fit is a closure test"""
-    if not fit.as_input()["closuretest"]["fakedata"]:
-        raise CheckError(f"Specified fit: {fit}, is not a closure test")
+    fitinfo = fit.as_input()
+    if not "closuretest" in fitinfo:
+        raise CheckError(
+            f"There is no `closuretest` namespace in {fit}'s runcard. "
+            f"{fit} is therefore not suitable for closure-test studies."
+        )
+    if not "fakedata" in fitinfo["closuretest"]:
+        raise CheckError(
+            f"The `fakedata` key does not exist in the `closuretest` namespace of {fit}'s runcard. "
+            f"{fit} is therefore not suitable for closure-test studies."
+        )
+    if not fitinfo["closuretest"]["fakedata"]:
+        raise CheckError(
+            f"The `fakedata` key is not set to `true` in the `closuretest` namespace of {fit}'s runcard. "
+            f"{fit} is therefore not suitable for closure-test studies."
+            )
 
 
 @make_argcheck
@@ -52,8 +66,7 @@ def check_fits_same_filterseed(fits):
 def check_fits_areclosures(fits):
     """Check all fits are closures"""
     for fit in fits:
-        if not fit.as_input()["closuretest"]["fakedata"]:
-            raise CheckError(f"Specified fit: {fit}, is not a closure test")
+        check_fit_isclosure.__wrapped__(fit)
 
 
 @make_argcheck

diff --git a/validphys2/src/validphys/closuretest/closure_results.py b/validphys2/src/validphys/closuretest/closure_results.py
@@ -110,6 +110,7 @@ def biases_table(
 
 
 @check_pdf_is_montecarlo
+@check_fit_isclosure
 def bootstrap_bias_experiment(
     dataset_inputs_results, underlying_dataset_inputs_results, bootstrap_samples=500
 ):
@@ -202,6 +203,7 @@ def variance_experiment(dataset_inputs_results, fit, use_fitcommondata):
     return variance_dataset(dataset_inputs_results, fit, use_fitcommondata)
 
 
+@check_fit_isclosure
 def bootstrap_variance_experiment(dataset_inputs_results, bootstrap_samples=500):
     """Calculate the variance as in `variance_experiment` but performs bootstrap
     sample of the estimator. Returns an array of variance for each resample,
@@ -371,6 +373,7 @@ def delta_chi2_table(
     return res
 
 
+@check_fit_isclosure
 def fit_underlying_pdfs_summary(fit, fitunderlyinglaw):
     """Returns a table with a single column for the `fit` with a row indication
     the PDF used to generate the data and the t0 pdf

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
@@ -1426,7 +1426,7 @@ def produce_scale_variation_theories(self, theoryid, point_prescription):
 
 
     @configparser.explicit_node
-    def produce_filter_data(self, fakedata: bool, theorycovmatconfig=None):
+    def produce_filter_data(self, fakedata: bool = False, theorycovmatconfig=None):
         """Set the action used to filter the data to filter either real or
         closure data. If the closure data filter is being used and if the
         theory covariance matrix is not being closure tested then filter

diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py
@@ -471,9 +471,10 @@ def iterated_runcard_yaml(
             fitting_data[seed] = random.randrange(0, 2**32)
 
     # Next "closuretest" section of runcard
-    closuretest_data = filtermap["closuretest"]
-    if "filterseed" in closuretest_data:
-        closuretest_data["filterseed"] = random.randrange(0, 2**32)
+    if "closuretest" in filtermap:
+        closuretest_data = filtermap["closuretest"]
+        if "filterseed" in closuretest_data:
+            closuretest_data["filterseed"] = random.randrange(0, 2**32)
 
     # update description if necessary
     if _updated_description is not None:

diff --git a/validphys2/src/validphys/scripts/vp_setupfit.py b/validphys2/src/validphys/scripts/vp_setupfit.py
@@ -43,7 +43,6 @@
     actions_=[
         'datacuts check_t0pdfset',
         'theory check_positivity',
-        'datacuts::closuretest::theory::fitting filter',
     ])
 
 SETUPFIT_PROVIDERS = ['validphys.filters',
@@ -145,6 +144,14 @@ def from_yaml(cls, o, *args, **kwargs):
         if not isinstance(file_content, dict):
             raise ConfigError(f"Expecting input runcard to be a mapping, "
                               f"not '{type(file_content)}'.")
+
+        if file_content.get('closuretest') is not None:
+            SETUPFIT_FIXED_CONFIG['actions_'].append(
+                'datacuts::closuretest::theory::fitting filter')
+        else:
+            SETUPFIT_FIXED_CONFIG['actions_'].append(
+                'datacuts::theory::fitting filter')
+
         if file_content.get('theorycovmatconfig') is not None:
             SETUPFIT_FIXED_CONFIG['actions_'].append(
                 'datacuts::theory::theorycovmatconfig nnfit_theory_covmat')