Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 0 additions & 33 deletions n3fit/runcards/Basic_hyperopt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,7 @@ fitting:
trvlseed: 1
nnseed: 2
mcseed: 3
seed: 9453862133528 # set the seed for the random generator
genrep: false # true = generate MC replicas, false = use real data
rngalgo: 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc
fitmethod: NGA # Minimization algorithm
ngen: 30000 # Maximum number of generations
nmutants: 80 # Number of mutants for replica
paramtype: NN
nnodes: [2, 5, 3, 1]

parameters: # This defines the parameter dictionary that is passed to the Model Trainer
nodes_per_layer: [35, 25, 8]
Expand Down Expand Up @@ -155,20 +148,6 @@ fitting:
- { fl: t8, pos: False, mutsize: [15], trainable: False, mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] }
- { fl: cp, pos: False, mutsize: [15], trainable: False, mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] }

############################################################
stopping:
stopmethod: LOOKBACK # Stopping method
lbdelta: 0 # Delta for look-back stopping
mingen: 0 # Minimum number of generations
window: 500 # Window for moving average
minchi2: 3.5 # Minimum chi2
minchi2exp: 6.0 # Minimum chi2 for experiments
nsmear: 200 # Smear for stopping
deltasm: 200 # Delta smear for stopping
rv: 2 # Ratio for validation stopping
rt: 0.5 # Ratio for training stopping
epsilon: 1e-6 # Gradient epsilon

############################################################
positivity:
posdatasets:
Expand All @@ -180,18 +159,6 @@ positivity:
- {dataset: POSDYD, poslambda: 1e10}
- {dataset: POSDYS, poslambda: 1e10}

############################################################
closuretest:
filterseed: 0 # Random seed to be used in filtering data partitions
fakedata: false # true = to use FAKEPDF to generate pseudo-data
fakepdf: MSTW2008nlo68cl # Theory input for pseudo-data
errorsize: 1.0 # uncertainties rescaling
fakenoise: false # true = to add random fluctuations to pseudo-data
rancutprob: 1.0 # Fraction of data to be included in the fit
rancutmethod: 0 # Method to select rancutprob data fraction
rancuttrnval: false # 0(1) to output training(valiation) chi2 in report
printpdf4gen: false # To print info on PDFs during minimization

############################################################
lhagrid:
nx: 150
Expand Down
28 changes: 0 additions & 28 deletions n3fit/runcards/Basic_runcard.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ fitting:
weight_freq: 100
profiling: False

seed : 9453862133528 # set the seed for the random generator
genrep : True # true = generate MC replicas, false = use real data
rngalgo : 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc

parameters: # This defines the parameter dictionary that is passed to the Model Trainer
nodes_per_layer: [15, 10, 8]
Expand Down Expand Up @@ -89,20 +87,6 @@ fitting:
- { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.56,1.29], largex: [1.45,3.03] }
- { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.12,1.19], largex: [1.83,6.70] }

############################################################
stopping:
stopmethod: LOOKBACK # Stopping method
lbdelta : 0 # Delta for look-back stopping
mingen : 0 # Minimum number of generations
window : 500 # Window for moving average
minchi2 : 3.5 # Minimum chi2
minchi2exp: 6.0 # Minimum chi2 for experiments
nsmear : 200 # Smear for stopping
deltasm : 200 # Delta smear for stopping
rv : 2 # Ratio for validation stopping
rt : 0.5 # Ratio for training stopping
epsilon : 1e-6 # Gradient epsilon

############################################################
positivity:
posdatasets:
Expand All @@ -113,18 +97,6 @@ positivity:
integrability:
integdatasets:
- {dataset: INTEGXT3, poslambda: 1e2}

############################################################
closuretest:
filterseed : 0 # Random seed to be used in filtering data partitions
fakedata : False # true = to use FAKEPDF to generate pseudo-data
fakepdf : MSTW2008nlo68cl # Theory input for pseudo-data
errorsize : 1.0 # uncertainties rescaling
fakenoise : False # true = to add random fluctuations to pseudo-data
rancutprob : 1.0 # Fraction of data to be included in the fit
rancutmethod: 0 # Method to select rancutprob data fraction
rancuttrnval: False # 0(1) to output training(valiation) chi2 in report
printpdf4gen: False # To print info on PDFs during minimization

############################################################
lhagrid:
Expand Down
32 changes: 0 additions & 32 deletions n3fit/runcards/DIS_diagonal_l2reg_example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,7 @@ fitting:
save: False
load: False

seed : 9453862133528 # set the seed for the random generator
genrep : True # true = generate MC replicas, false = use real data
rngalgo : 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc
fitmethod: NGA # Minimization algorithm
nmutants : 80 # Number of mutants for replica
paramtype: NN
nnodes : [2,5,3,1]
diagonal_basis: True

parameters: # This defines the parameter dictionary that is passed to the Model Trainer
Expand Down Expand Up @@ -112,20 +106,6 @@ fitting:
- { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] }
- { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] }

############################################################
stopping:
stopmethod: LOOKBACK # Stopping method
lbdelta : 0 # Delta for look-back stopping
mingen : 0 # Minimum number of generations
window : 500 # Window for moving average
minchi2 : 3.5 # Minimum chi2
minchi2exp: 6.0 # Minimum chi2 for experiments
nsmear : 200 # Smear for stopping
deltasm : 200 # Delta smear for stopping
rv : 2 # Ratio for validation stopping
rt : 0.5 # Ratio for training stopping
epsilon : 1e-6 # Gradient epsilon

############################################################
positivity:
posdatasets:
Expand All @@ -137,18 +117,6 @@ positivity:
- { dataset: POSDYD, poslambda: 1e10 }
- { dataset: POSDYS, poslambda: 1e10 }

############################################################
closuretest:
filterseed : 0 # Random seed to be used in filtering data partitions
fakedata : False # true = to use FAKEPDF to generate pseudo-data
fakepdf : MSTW2008nlo68cl # Theory input for pseudo-data
errorsize : 1.0 # uncertainties rescaling
fakenoise : False # true = to add random fluctuations to pseudo-data
rancutprob : 1.0 # Fraction of data to be included in the fit
rancutmethod: 0 # Method to select rancutprob data fraction
rancuttrnval: False # 0(1) to output training(valiation) chi2 in report
printpdf4gen: False # To print info on PDFs during minimization

############################################################
lhagrid:
nx : 150
Expand Down
32 changes: 0 additions & 32 deletions n3fit/runcards/PN3_DIS_example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,7 @@ fitting:
save: False
load: False

seed : 9453862133528 # set the seed for the random generator
genrep : True # true = generate MC replicas, false = use real data
rngalgo : 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc
fitmethod: NGA # Minimization algorithm
nmutants : 80 # Number of mutants for replica
paramtype: NN
nnodes : [2,5,3,1]

parameters: # This defines the parameter dictionary that is passed to the Model Trainer
nodes_per_layer: [35, 25, 8]
Expand Down Expand Up @@ -109,20 +103,6 @@ fitting:
- { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] }
- { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] }

############################################################
stopping:
stopmethod: LOOKBACK # Stopping method
lbdelta : 0 # Delta for look-back stopping
mingen : 0 # Minimum number of generations
window : 500 # Window for moving average
minchi2 : 3.5 # Minimum chi2
minchi2exp: 6.0 # Minimum chi2 for experiments
nsmear : 200 # Smear for stopping
deltasm : 200 # Delta smear for stopping
rv : 2 # Ratio for validation stopping
rt : 0.5 # Ratio for training stopping
epsilon : 1e-6 # Gradient epsilon

############################################################
positivity:
posdatasets:
Expand All @@ -134,18 +114,6 @@ positivity:
- { dataset: POSDYD, poslambda: 1e10 }
- { dataset: POSDYS, poslambda: 1e10 }

############################################################
closuretest:
filterseed : 0 # Random seed to be used in filtering data partitions
fakedata : False # true = to use FAKEPDF to generate pseudo-data
fakepdf : MSTW2008nlo68cl # Theory input for pseudo-data
errorsize : 1.0 # uncertainties rescaling
fakenoise : False # true = to add random fluctuations to pseudo-data
rancutprob : 1.0 # Fraction of data to be included in the fit
rancutmethod: 0 # Method to select rancutprob data fraction
rancuttrnval: False # 0(1) to output training(valiation) chi2 in report
printpdf4gen: False # To print info on PDFs during minimization

############################################################
lhagrid:
nx : 150
Expand Down
35 changes: 1 addition & 34 deletions n3fit/runcards/developing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,7 @@ fitting:
mcseed: 3
load: 'developing_weights.h5'

seed : 9453862133528 # set the seed for the random generator
genrep : true # on = generate MC replicas, off = use real data
rngalgo : 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc
fitmethod: NGA # Minimization algorithm
ngen : 30000 # Maximum number of generations
nmutants : 80 # Number of mutants for replica
paramtype: NN
nnodes : [2,5,3,1]
genrep : true # true = generate MC replicas, false = use real data

parameters: # This defines the parameter dictionary that is passed to the Model Trainer
nodes_per_layer: [50, 35, 25, 8]
Expand Down Expand Up @@ -157,20 +150,6 @@ fitting:
- { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] }
- { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] }

############################################################
stopping:
stopmethod: LOOKBACK # Stopping method
lbdelta : 0 # Delta for look-back stopping
mingen : 0 # Minimum number of generations
window : 500 # Window for moving average
minchi2 : 3.5 # Minimum chi2
minchi2exp: 6.0 # Minimum chi2 for experiments
nsmear : 200 # Smear for stopping
deltasm : 200 # Delta smear for stopping
rv : 2 # Ratio for validation stopping
rt : 0.5 # Ratio for training stopping
epsilon : 1e-6 # Gradient epsilon

############################################################
positivity:
posdatasets:
Expand All @@ -186,18 +165,6 @@ integrability:
- { dataset: INTEGXT8, poslambda: 1e2 }
- { dataset: INTEGXT3, poslambda: 1e2 }

############################################################
closuretest:
filterseed : 0 # Random seed to be used in filtering data partitions
fakedata : False # on = to use FAKEPDF to generate pseudo-data
fakepdf : MSTW2008nlo68cl # Theory input for pseudo-data
errorsize : 1.0 # uncertainties rescaling
fakenoise : False # on = to add random fluctuations to pseudo-data
rancutprob : 1.0 # Fraction of data to be included in the fit
rancutmethod: 0 # Method to select rancutprob data fraction
rancuttrnval: False # 0(1) to output training(valiation) chi2 in report
printpdf4gen: False # To print info on PDFs during minimization

############################################################
lhagrid:
nx : 150
Expand Down
10 changes: 9 additions & 1 deletion n3fit/src/n3fit/scripts/n3fit_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
N3FIT_FIXED_CONFIG = dict(
use_cuts = 'internal',
use_t0 = True,
actions_ = ['datacuts::theory::closuretest performfit']
actions_ = []
)

N3FIT_PROVIDERS = ["n3fit.performfit", "validphys.results"]
Expand Down Expand Up @@ -109,6 +109,14 @@ def from_yaml(cls, o, *args, **kwargs):
raise ConfigError(f"Failed to parse yaml file: {e}")
if not isinstance(file_content, dict):
raise ConfigError(f"Expecting input runcard to be a mapping, " f"not '{type(file_content)}'.")

if file_content.get('closuretest') is not None:
N3FIT_FIXED_CONFIG['actions_'].append(
'datacuts::theory::closuretest performfit')
else:
N3FIT_FIXED_CONFIG['actions_'].append(
'datacuts::theory performfit')

file_content.update(N3FIT_FIXED_CONFIG)
return cls(file_content, *args, **kwargs)

Expand Down
21 changes: 17 additions & 4 deletions validphys2/src/validphys/closuretest/closure_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,22 @@ def check_use_fitcommondata(use_fitcommondata):
@make_argcheck
def check_fit_isclosure(fit):
"""Check the input fit is a closure test"""
if not fit.as_input()["closuretest"]["fakedata"]:
raise CheckError(f"Specified fit: {fit}, is not a closure test")
fitinfo = fit.as_input()
if not "closuretest" in fitinfo:
raise CheckError(
f"There is no `closuretest` namespace in {fit}'s runcard. "
f"{fit} is therefore not suitable for closure-test studies."
)
if not "fakedata" in fitinfo["closuretest"]:
raise CheckError(
f"The `fakedata` key does not exist in the `closuretest` namespace of {fit}'s runcard. "
f"{fit} is therefore not suitable for closure-test studies."
)
if not fitinfo["closuretest"]["fakedata"]:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, maybe it is not related specifically to this PR but what's the point of having a key that is mandatory and that is only allowed to be set to true?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because it essentially is checking that somebody didn't give a normal fit but try to calc a closure estimator. Because otherwise it could fail silently since - up until this PR - all fits have had a fakepdf which does absolutely nothing.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the name of the check suggests

raise CheckError(
f"The `fakedata` key is not set to `true` in the `closuretest` namespace of {fit}'s runcard. "
f"{fit} is therefore not suitable for closure-test studies."
)


@make_argcheck
Expand Down Expand Up @@ -52,8 +66,7 @@ def check_fits_same_filterseed(fits):
def check_fits_areclosures(fits):
"""Check all fits are closures"""
for fit in fits:
if not fit.as_input()["closuretest"]["fakedata"]:
raise CheckError(f"Specified fit: {fit}, is not a closure test")
check_fit_isclosure.__wrapped__(fit)


@make_argcheck
Expand Down
3 changes: 3 additions & 0 deletions validphys2/src/validphys/closuretest/closure_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def biases_table(


@check_pdf_is_montecarlo
@check_fit_isclosure
def bootstrap_bias_experiment(
dataset_inputs_results, underlying_dataset_inputs_results, bootstrap_samples=500
):
Expand Down Expand Up @@ -202,6 +203,7 @@ def variance_experiment(dataset_inputs_results, fit, use_fitcommondata):
return variance_dataset(dataset_inputs_results, fit, use_fitcommondata)


@check_fit_isclosure
def bootstrap_variance_experiment(dataset_inputs_results, bootstrap_samples=500):
"""Calculate the variance as in `variance_experiment` but performs bootstrap
sample of the estimator. Returns an array of variance for each resample,
Expand Down Expand Up @@ -371,6 +373,7 @@ def delta_chi2_table(
return res


@check_fit_isclosure
def fit_underlying_pdfs_summary(fit, fitunderlyinglaw):
"""Returns a table with a single column for the `fit` with a row indication
the PDF used to generate the data and the t0 pdf
Expand Down
2 changes: 1 addition & 1 deletion validphys2/src/validphys/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1426,7 +1426,7 @@ def produce_scale_variation_theories(self, theoryid, point_prescription):


@configparser.explicit_node
def produce_filter_data(self, fakedata: bool, theorycovmatconfig=None):
def produce_filter_data(self, fakedata: bool = False, theorycovmatconfig=None):
"""Set the action used to filter the data to filter either real or
closure data. If the closure data filter is being used and if the
theory covariance matrix is not being closure tested then filter
Expand Down
7 changes: 4 additions & 3 deletions validphys2/src/validphys/eff_exponents.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,9 +471,10 @@ def iterated_runcard_yaml(
fitting_data[seed] = random.randrange(0, 2**32)

# Next "closuretest" section of runcard
closuretest_data = filtermap["closuretest"]
if "filterseed" in closuretest_data:
closuretest_data["filterseed"] = random.randrange(0, 2**32)
if "closuretest" in filtermap:
closuretest_data = filtermap["closuretest"]
if "filterseed" in closuretest_data:
closuretest_data["filterseed"] = random.randrange(0, 2**32)

# update description if necessary
if _updated_description is not None:
Expand Down
9 changes: 8 additions & 1 deletion validphys2/src/validphys/scripts/vp_setupfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
actions_=[
'datacuts check_t0pdfset',
'theory check_positivity',
'datacuts::closuretest::theory::fitting filter',
])

SETUPFIT_PROVIDERS = ['validphys.filters',
Expand Down Expand Up @@ -145,6 +144,14 @@ def from_yaml(cls, o, *args, **kwargs):
if not isinstance(file_content, dict):
raise ConfigError(f"Expecting input runcard to be a mapping, "
f"not '{type(file_content)}'.")

if file_content.get('closuretest') is not None:
SETUPFIT_FIXED_CONFIG['actions_'].append(
'datacuts::closuretest::theory::fitting filter')
else:
SETUPFIT_FIXED_CONFIG['actions_'].append(
'datacuts::theory::fitting filter')

if file_content.get('theorycovmatconfig') is not None:
SETUPFIT_FIXED_CONFIG['actions_'].append(
'datacuts::theory::theorycovmatconfig nnfit_theory_covmat')
Expand Down