From 5a3a3e80188568a1c14ba305a42d8e14ab807ed2 Mon Sep 17 00:00:00 2001 From: Cameron Voisey Date: Fri, 16 Oct 2020 09:42:34 +0100 Subject: [PATCH 1/9] Remove requirement by vp-setupfit that closuretest and fakedata are specified --- validphys2/src/validphys/config.py | 2 +- validphys2/src/validphys/scripts/vp_setupfit.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index f81ff2acf5..8da506c338 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1400,7 +1400,7 @@ def produce_scale_variation_theories(self, theoryid, point_prescription): @configparser.explicit_node - def produce_filter_data(self, fakedata: bool, theorycovmatconfig=None): + def produce_filter_data(self, fakedata: bool = False, theorycovmatconfig=None): """Set the action used to filter the data to filter either real or closure data. If the closure data filter is being used and if the theory covariance matrix is not being closure tested then filter diff --git a/validphys2/src/validphys/scripts/vp_setupfit.py b/validphys2/src/validphys/scripts/vp_setupfit.py index 7065334794..cd4d8eab1d 100644 --- a/validphys2/src/validphys/scripts/vp_setupfit.py +++ b/validphys2/src/validphys/scripts/vp_setupfit.py @@ -43,7 +43,6 @@ actions_=[ 'datacuts check_t0pdfset', 'theory check_positivity', - 'datacuts::closuretest::theory::fitting filter', ]) SETUPFIT_PROVIDERS = ['validphys.filters', @@ -145,6 +144,14 @@ def from_yaml(cls, o, *args, **kwargs): if not isinstance(file_content, dict): raise ConfigError(f"Expecting input runcard to be a mapping, " f"not '{type(file_content)}'.") + + if file_content.get('closuretest') is not None: + SETUPFIT_FIXED_CONFIG['actions_'].append( + 'datacuts::closuretest::theory::fitting filter') + else: + SETUPFIT_FIXED_CONFIG['actions_'].append( + 'datacuts::theory::fitting filter') + if file_content.get('theorycovmatconfig') is not None: SETUPFIT_FIXED_CONFIG['actions_'].append( 'datacuts::theory::theorycovmatconfig nnfit_theory_covmat') From 43bba1f3a1fb7402ee17f40877b5b28d05fbf8bc Mon Sep 17 00:00:00 2001 From: Cameron Voisey Date: Fri, 16 Oct 2020 13:10:41 +0100 Subject: [PATCH 2/9] Update eff_exponents.iterated_runcard_yaml so it doesn't assume that closuretest namespace is always there --- validphys2/src/validphys/eff_exponents.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py index ba1ac4037d..5b64367dbf 100644 --- a/validphys2/src/validphys/eff_exponents.py +++ b/validphys2/src/validphys/eff_exponents.py @@ -471,9 +471,10 @@ def iterated_runcard_yaml( fitting_data[seed] = random.randrange(0, 2**32) # Next "closuretest" section of runcard - closuretest_data = filtermap["closuretest"] - if "filterseed" in closuretest_data: - closuretest_data["filterseed"] = random.randrange(0, 2**32) + if "closuretest" in filtermap: + closuretest_data = filtermap["closuretest"] + if "filterseed" in closuretest_data: + closuretest_data["filterseed"] = random.randrange(0, 2**32) # update description if necessary if _updated_description is not None: From 9777ac4e1e78076bf167daa515b3c91dddf524b5 Mon Sep 17 00:00:00 2001 From: Cameron Voisey Date: Fri, 16 Oct 2020 13:37:27 +0100 Subject: [PATCH 3/9] Make n3fit run without closuretest namespace in runcard --- n3fit/src/n3fit/scripts/n3fit_exec.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 91f732d22c..f6c5f5625b 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -22,7 +22,7 @@ N3FIT_FIXED_CONFIG = dict( use_cuts = 'internal', use_t0 = True, - actions_ = ['datacuts::theory::closuretest performfit'] + actions_ = [] ) N3FIT_PROVIDERS = ["n3fit.performfit"] @@ -109,6 +109,14 @@ def from_yaml(cls, o, *args, **kwargs): raise ConfigError(f"Failed to parse yaml file: {e}") if not isinstance(file_content, dict): raise ConfigError(f"Expecting input runcard to be a mapping, " f"not '{type(file_content)}'.") + + if file_content.get('closuretest') is not None: + N3FIT_FIXED_CONFIG['actions_'].append( + 'datacuts::theory::closuretest performfit') + else: + N3FIT_FIXED_CONFIG['actions_'].append( + 'datacuts::theory performfit') + file_content.update(N3FIT_FIXED_CONFIG) return cls(file_content, *args, **kwargs) From d4968323108fba1c93cc67305ec7a41e1b954c6d Mon Sep 17 00:00:00 2001 From: Cameron Voisey Date: Tue, 10 Nov 2020 17:47:15 +0000 Subject: [PATCH 4/9] Expand check_fit_isclosure so that it gives useful error messages --- .../validphys/closuretest/closure_checks.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/closuretest/closure_checks.py b/validphys2/src/validphys/closuretest/closure_checks.py index 156e8b7214..f07d607ad8 100644 --- a/validphys2/src/validphys/closuretest/closure_checks.py +++ b/validphys2/src/validphys/closuretest/closure_checks.py @@ -23,8 +23,22 @@ def check_use_fitcommondata(use_fitcommondata): @make_argcheck def check_fit_isclosure(fit): """Check the input fit is a closure test""" - if not fit.as_input()["closuretest"]["fakedata"]: - raise CheckError(f"Specified fit: {fit}, is not a closure test") + fitinfo = fit.as_input() + if not "closuretest" in fitinfo: + raise CheckError( + f"There is no `closuretest` namespace in {fit}'s runcard. " + f"{fit} is therefore not suitable for closure-test studies." + ) + if not "fakedata" in fitinfo["closuretest"]: + raise CheckError( + f"The `fakedata` key does not exist in the `closuretest` namespace of {fit}'s runcard. " + f"{fit} is therefore not suitable for closure-test studies." + ) + if not fitinfo["closuretest"]["fakedata"]: + raise CheckError( + f"The `fakedata` key is not set to `true` in the `closuretest` namespace of {fit}'s runcard. " + f"{fit} is therefore not suitable for closure-test studies." + ) @make_argcheck From 74c1b0d099c23caa6eee557a7709d2b9191a602c Mon Sep 17 00:00:00 2001 From: Cameron Voisey Date: Wed, 11 Nov 2020 11:22:04 +0000 Subject: [PATCH 5/9] Call check_fit_isclosure in check_fits_areclosures to simplify things --- validphys2/src/validphys/closuretest/closure_checks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/validphys2/src/validphys/closuretest/closure_checks.py b/validphys2/src/validphys/closuretest/closure_checks.py index f07d607ad8..1188cc5ab8 100644 --- a/validphys2/src/validphys/closuretest/closure_checks.py +++ b/validphys2/src/validphys/closuretest/closure_checks.py @@ -66,8 +66,7 @@ def check_fits_same_filterseed(fits): def check_fits_areclosures(fits): """Check all fits are closures""" for fit in fits: - if not fit.as_input()["closuretest"]["fakedata"]: - raise CheckError(f"Specified fit: {fit}, is not a closure test") + check_fit_isclosure.__wrapped__(fit) @make_argcheck From 31be492999ee9fb388e168b86476a84c18e17515 Mon Sep 17 00:00:00 2001 From: Cameron Voisey Date: Wed, 11 Nov 2020 11:55:55 +0000 Subject: [PATCH 6/9] Add extra checks to closure_results.py --- validphys2/src/validphys/closuretest/closure_results.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/validphys2/src/validphys/closuretest/closure_results.py b/validphys2/src/validphys/closuretest/closure_results.py index d0bdabf5db..9f30cf15ee 100644 --- a/validphys2/src/validphys/closuretest/closure_results.py +++ b/validphys2/src/validphys/closuretest/closure_results.py @@ -110,6 +110,7 @@ def biases_table( @check_pdf_is_montecarlo +@check_fit_isclosure def bootstrap_bias_experiment( dataset_inputs_results, underlying_dataset_inputs_results, bootstrap_samples=500 ): @@ -202,6 +203,7 @@ def variance_experiment(dataset_inputs_results, fit, use_fitcommondata): return variance_dataset(dataset_inputs_results, fit, use_fitcommondata) +@check_fit_isclosure def bootstrap_variance_experiment(dataset_inputs_results, bootstrap_samples=500): """Calculate the variance as in `variance_experiment` but performs bootstrap sample of the estimator. Returns an array of variance for each resample, @@ -371,6 +373,7 @@ def delta_chi2_table( return res +@check_fit_isclosure def fit_underlying_pdfs_summary(fit, fitunderlyinglaw): """Returns a table with a single column for the `fit` with a row indication the PDF used to generate the data and the t0 pdf From 258f480adfe9bc49e28715eb73bc1c6b8ac40de8 Mon Sep 17 00:00:00 2001 From: Cameron Voisey Date: Wed, 9 Dec 2020 10:28:05 +0000 Subject: [PATCH 7/9] Remove unnecessary info from n3fit runcard that fit bot sees, i.e. developing.yml --- n3fit/runcards/developing.yml | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/n3fit/runcards/developing.yml b/n3fit/runcards/developing.yml index 79aa438817..341ce8f711 100644 --- a/n3fit/runcards/developing.yml +++ b/n3fit/runcards/developing.yml @@ -110,9 +110,7 @@ fitting: mcseed: 3 load: 'developing_weights.h5' - seed : 9453862133528 # set the seed for the random generator genrep : true # on = generate MC replicas, off = use real data - rngalgo : 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc fitmethod: NGA # Minimization algorithm ngen : 30000 # Maximum number of generations nmutants : 80 # Number of mutants for replica @@ -157,20 +155,6 @@ fitting: - { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] } - { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] } -############################################################ -stopping: - stopmethod: LOOKBACK # Stopping method - lbdelta : 0 # Delta for look-back stopping - mingen : 0 # Minimum number of generations - window : 500 # Window for moving average - minchi2 : 3.5 # Minimum chi2 - minchi2exp: 6.0 # Minimum chi2 for experiments - nsmear : 200 # Smear for stopping - deltasm : 200 # Delta smear for stopping - rv : 2 # Ratio for validation stopping - rt : 0.5 # Ratio for training stopping - epsilon : 1e-6 # Gradient epsilon - ############################################################ positivity: posdatasets: @@ -186,18 +170,6 @@ integrability: - { dataset: INTEGXT8, poslambda: 1e2 } - { dataset: INTEGXT3, poslambda: 1e2 } -############################################################ -closuretest: - filterseed : 0 # Random seed to be used in filtering data partitions - fakedata : False # on = to use FAKEPDF to generate pseudo-data - fakepdf : MSTW2008nlo68cl # Theory input for pseudo-data - errorsize : 1.0 # uncertainties rescaling - fakenoise : False # on = to add random fluctuations to pseudo-data - rancutprob : 1.0 # Fraction of data to be included in the fit - rancutmethod: 0 # Method to select rancutprob data fraction - rancuttrnval: False # 0(1) to output training(valiation) chi2 in report - printpdf4gen: False # To print info on PDFs during minimization - ############################################################ lhagrid: nx : 150 From b59b0e9c9f12ba43a7cd90a1e0f80f4dc7f120a2 Mon Sep 17 00:00:00 2001 From: Cameron Voisey Date: Wed, 9 Dec 2020 15:59:57 +0000 Subject: [PATCH 8/9] Remove more unnecessary info from developing.yml --- n3fit/runcards/developing.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/n3fit/runcards/developing.yml b/n3fit/runcards/developing.yml index 341ce8f711..0a5ee3308d 100644 --- a/n3fit/runcards/developing.yml +++ b/n3fit/runcards/developing.yml @@ -110,12 +110,7 @@ fitting: mcseed: 3 load: 'developing_weights.h5' - genrep : true # on = generate MC replicas, off = use real data - fitmethod: NGA # Minimization algorithm - ngen : 30000 # Maximum number of generations - nmutants : 80 # Number of mutants for replica - paramtype: NN - nnodes : [2,5,3,1] + genrep : true # true = generate MC replicas, false = use real data parameters: # This defines the parameter dictionary that is passed to the Model Trainer nodes_per_layer: [50, 35, 25, 8] From 595ebd44a59d74f5b740260c266b44726adb4ea2 Mon Sep 17 00:00:00 2001 From: Cameron Voisey Date: Mon, 14 Dec 2020 11:40:18 +0000 Subject: [PATCH 9/9] Remove unnecessary fields from n3fit runcards --- n3fit/runcards/Basic_hyperopt.yml | 33 ------------------- n3fit/runcards/Basic_runcard.yml | 28 ---------------- n3fit/runcards/DIS_diagonal_l2reg_example.yml | 32 ------------------ n3fit/runcards/PN3_DIS_example.yml | 32 ------------------ 4 files changed, 125 deletions(-) diff --git a/n3fit/runcards/Basic_hyperopt.yml b/n3fit/runcards/Basic_hyperopt.yml index 69fb2553bc..b686b2029d 100644 --- a/n3fit/runcards/Basic_hyperopt.yml +++ b/n3fit/runcards/Basic_hyperopt.yml @@ -109,14 +109,7 @@ fitting: trvlseed: 1 nnseed: 2 mcseed: 3 - seed: 9453862133528 # set the seed for the random generator genrep: false # true = generate MC replicas, false = use real data - rngalgo: 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc - fitmethod: NGA # Minimization algorithm - ngen: 30000 # Maximum number of generations - nmutants: 80 # Number of mutants for replica - paramtype: NN - nnodes: [2, 5, 3, 1] parameters: # This defines the parameter dictionary that is passed to the Model Trainer nodes_per_layer: [35, 25, 8] @@ -155,20 +148,6 @@ fitting: - { fl: t8, pos: False, mutsize: [15], trainable: False, mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] } - { fl: cp, pos: False, mutsize: [15], trainable: False, mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] } -############################################################ -stopping: - stopmethod: LOOKBACK # Stopping method - lbdelta: 0 # Delta for look-back stopping - mingen: 0 # Minimum number of generations - window: 500 # Window for moving average - minchi2: 3.5 # Minimum chi2 - minchi2exp: 6.0 # Minimum chi2 for experiments - nsmear: 200 # Smear for stopping - deltasm: 200 # Delta smear for stopping - rv: 2 # Ratio for validation stopping - rt: 0.5 # Ratio for training stopping - epsilon: 1e-6 # Gradient epsilon - ############################################################ positivity: posdatasets: @@ -180,18 +159,6 @@ positivity: - {dataset: POSDYD, poslambda: 1e10} - {dataset: POSDYS, poslambda: 1e10} -############################################################ -closuretest: - filterseed: 0 # Random seed to be used in filtering data partitions - fakedata: false # true = to use FAKEPDF to generate pseudo-data - fakepdf: MSTW2008nlo68cl # Theory input for pseudo-data - errorsize: 1.0 # uncertainties rescaling - fakenoise: false # true = to add random fluctuations to pseudo-data - rancutprob: 1.0 # Fraction of data to be included in the fit - rancutmethod: 0 # Method to select rancutprob data fraction - rancuttrnval: false # 0(1) to output training(valiation) chi2 in report - printpdf4gen: false # To print info on PDFs during minimization - ############################################################ lhagrid: nx: 150 diff --git a/n3fit/runcards/Basic_runcard.yml b/n3fit/runcards/Basic_runcard.yml index af8c8d3b0e..6d0d567e55 100644 --- a/n3fit/runcards/Basic_runcard.yml +++ b/n3fit/runcards/Basic_runcard.yml @@ -47,9 +47,7 @@ fitting: weight_freq: 100 profiling: False - seed : 9453862133528 # set the seed for the random generator genrep : True # true = generate MC replicas, false = use real data - rngalgo : 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc parameters: # This defines the parameter dictionary that is passed to the Model Trainer nodes_per_layer: [15, 10, 8] @@ -89,20 +87,6 @@ fitting: - { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.56,1.29], largex: [1.45,3.03] } - { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.12,1.19], largex: [1.83,6.70] } -############################################################ -stopping: - stopmethod: LOOKBACK # Stopping method - lbdelta : 0 # Delta for look-back stopping - mingen : 0 # Minimum number of generations - window : 500 # Window for moving average - minchi2 : 3.5 # Minimum chi2 - minchi2exp: 6.0 # Minimum chi2 for experiments - nsmear : 200 # Smear for stopping - deltasm : 200 # Delta smear for stopping - rv : 2 # Ratio for validation stopping - rt : 0.5 # Ratio for training stopping - epsilon : 1e-6 # Gradient epsilon - ############################################################ positivity: posdatasets: @@ -113,18 +97,6 @@ positivity: integrability: integdatasets: - {dataset: INTEGXT3, poslambda: 1e2} - -############################################################ -closuretest: - filterseed : 0 # Random seed to be used in filtering data partitions - fakedata : False # true = to use FAKEPDF to generate pseudo-data - fakepdf : MSTW2008nlo68cl # Theory input for pseudo-data - errorsize : 1.0 # uncertainties rescaling - fakenoise : False # true = to add random fluctuations to pseudo-data - rancutprob : 1.0 # Fraction of data to be included in the fit - rancutmethod: 0 # Method to select rancutprob data fraction - rancuttrnval: False # 0(1) to output training(valiation) chi2 in report - printpdf4gen: False # To print info on PDFs during minimization ############################################################ lhagrid: diff --git a/n3fit/runcards/DIS_diagonal_l2reg_example.yml b/n3fit/runcards/DIS_diagonal_l2reg_example.yml index 67563190b4..f392199f07 100644 --- a/n3fit/runcards/DIS_diagonal_l2reg_example.yml +++ b/n3fit/runcards/DIS_diagonal_l2reg_example.yml @@ -66,13 +66,7 @@ fitting: save: False load: False - seed : 9453862133528 # set the seed for the random generator genrep : True # true = generate MC replicas, false = use real data - rngalgo : 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc - fitmethod: NGA # Minimization algorithm - nmutants : 80 # Number of mutants for replica - paramtype: NN - nnodes : [2,5,3,1] diagonal_basis: True parameters: # This defines the parameter dictionary that is passed to the Model Trainer @@ -112,20 +106,6 @@ fitting: - { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] } - { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] } -############################################################ -stopping: - stopmethod: LOOKBACK # Stopping method - lbdelta : 0 # Delta for look-back stopping - mingen : 0 # Minimum number of generations - window : 500 # Window for moving average - minchi2 : 3.5 # Minimum chi2 - minchi2exp: 6.0 # Minimum chi2 for experiments - nsmear : 200 # Smear for stopping - deltasm : 200 # Delta smear for stopping - rv : 2 # Ratio for validation stopping - rt : 0.5 # Ratio for training stopping - epsilon : 1e-6 # Gradient epsilon - ############################################################ positivity: posdatasets: @@ -137,18 +117,6 @@ positivity: - { dataset: POSDYD, poslambda: 1e10 } - { dataset: POSDYS, poslambda: 1e10 } -############################################################ -closuretest: - filterseed : 0 # Random seed to be used in filtering data partitions - fakedata : False # true = to use FAKEPDF to generate pseudo-data - fakepdf : MSTW2008nlo68cl # Theory input for pseudo-data - errorsize : 1.0 # uncertainties rescaling - fakenoise : False # true = to add random fluctuations to pseudo-data - rancutprob : 1.0 # Fraction of data to be included in the fit - rancutmethod: 0 # Method to select rancutprob data fraction - rancuttrnval: False # 0(1) to output training(valiation) chi2 in report - printpdf4gen: False # To print info on PDFs during minimization - ############################################################ lhagrid: nx : 150 diff --git a/n3fit/runcards/PN3_DIS_example.yml b/n3fit/runcards/PN3_DIS_example.yml index 330178c65d..a0921ced5f 100644 --- a/n3fit/runcards/PN3_DIS_example.yml +++ b/n3fit/runcards/PN3_DIS_example.yml @@ -66,13 +66,7 @@ fitting: save: False load: False - seed : 9453862133528 # set the seed for the random generator genrep : True # true = generate MC replicas, false = use real data - rngalgo : 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc - fitmethod: NGA # Minimization algorithm - nmutants : 80 # Number of mutants for replica - paramtype: NN - nnodes : [2,5,3,1] parameters: # This defines the parameter dictionary that is passed to the Model Trainer nodes_per_layer: [35, 25, 8] @@ -109,20 +103,6 @@ fitting: - { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] } - { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] } -############################################################ -stopping: - stopmethod: LOOKBACK # Stopping method - lbdelta : 0 # Delta for look-back stopping - mingen : 0 # Minimum number of generations - window : 500 # Window for moving average - minchi2 : 3.5 # Minimum chi2 - minchi2exp: 6.0 # Minimum chi2 for experiments - nsmear : 200 # Smear for stopping - deltasm : 200 # Delta smear for stopping - rv : 2 # Ratio for validation stopping - rt : 0.5 # Ratio for training stopping - epsilon : 1e-6 # Gradient epsilon - ############################################################ positivity: posdatasets: @@ -134,18 +114,6 @@ positivity: - { dataset: POSDYD, poslambda: 1e10 } - { dataset: POSDYS, poslambda: 1e10 } -############################################################ -closuretest: - filterseed : 0 # Random seed to be used in filtering data partitions - fakedata : False # true = to use FAKEPDF to generate pseudo-data - fakepdf : MSTW2008nlo68cl # Theory input for pseudo-data - errorsize : 1.0 # uncertainties rescaling - fakenoise : False # true = to add random fluctuations to pseudo-data - rancutprob : 1.0 # Fraction of data to be included in the fit - rancutmethod: 0 # Method to select rancutprob data fraction - rancuttrnval: False # 0(1) to output training(valiation) chi2 in report - printpdf4gen: False # To print info on PDFs during minimization - ############################################################ lhagrid: nx : 150