Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_8TEV_HIMASS/data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
data_central:
- 2.78800000e+00
- 2.7472
- 2.7268
- 2.7472
- 2.69960000e+00
- 2.686
- 2.61120000e+00
- 2.36640000e+00
- 1.9856
- 1.5572
- 1.04720000e+00
- 4.97080000e-01
- 1.04
- 1.04
- 1.03
- 1.05
- 1.02
- 0.968
- 9.12000000e-01
- 0.779
- 0.664
- 0.483
- 0.335
- 0.163
- 0.484
- 4.78000000e-01
- 0.486
- 0.496
- 4.58000000e-01
- 0.432
- 0.376
- 0.332
- 0.268
- 0.2
- 0.1208
- 0.0486
- 1.42000000e-01
- 0.1344
- 1.26400000e-01
- 0.098
- 0.0524
- 1.52400000e-02
- 0.0294
- 0.0276
- 2.28000000e-02
- 0.0161
- 0.00442
- 5.74000000e-04
96 changes: 96 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_8TEV_HIMASS/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
filter.py module for ATLAS_Z0_8TEV_HIMASS dataset
When running `python filter.py` the relevant uncertainties , data and kinematics yaml
file will be created in the `nnpdf_data/commondata/ATLAS_Z0_8TEV_LOWMASS` directory.
"""

import yaml
from filter_utils import get_kinematics, get_data_values, get_systematics
from nnpdf_data.filter_utils.utils import prettify_float

yaml.add_representer(float, prettify_float)


def filter_ATLAS_Z0_8TEV_HIMASS_data_kinetic():
"""
This function writes the central values and kinematics to yaml files.
"""

kin = get_kinematics()
central_values = list(get_data_values())

data_central_yaml = {"data_central": central_values}

kinematics_yaml = {"bins": kin}

# write central values and kinematics to yaml file
with open("data.yaml", "w") as file:
yaml.dump(data_central_yaml, file, sort_keys=False)

with open("kinematics.yaml", "w") as file:
yaml.dump(kinematics_yaml, file, sort_keys=False)


def filter_ATLAS_Z0_8TEV_HIMASS_systematics():
"""
This function writes the systematics to a yaml file.
"""

with open("metadata.yaml", "r") as file:
metadata = yaml.safe_load(file)

systematics = get_systematics()

# error definition
error_definitions = {}
errors = []

for sys in systematics:
if sys[0]['name'] == 'stat':
sys[0]['name'] = 'stat_mult'
error_definitions[sys[0]['name']] = {
"description": sys[0]['name'], # stat is required to have treatment == ADD
"treatment": "MULT",
"type": "UNCORR",
}

elif sys[0]['name'] == 'sys,unc':
error_definitions[sys[0]['name']] = {
"description": f"{sys[0]['name']}",
"treatment": "MULT",
"type": "UNCORR",
}

elif sys[0]['name'] == 'sys,lumi':
error_definitions["ATLASLUMI12"] = {
"description": f"ATLASLUMI12",
"treatment": "MULT",
"type": "ATLASLUMI12",
}

else:
error_definitions[sys[0]['name']] = {
"description": f"{sys[0]['name']}",
"treatment": "MULT",
"type": "CORR",
}

#
for i in range(metadata['implemented_observables'][0]['ndata']):
error_value = {}

for sys in systematics:
error_value[sys[0]['name']] = float(sys[0]['values'][i])

errors.append(error_value)

uncertainties_yaml = {"definitions": error_definitions, "bins": errors}

# write uncertainties
with open(f"uncertainties.yaml", 'w') as file:
yaml.dump(uncertainties_yaml, file, sort_keys=False)


if __name__ == "__main__":
filter_ATLAS_Z0_8TEV_HIMASS_data_kinetic()
filter_ATLAS_Z0_8TEV_HIMASS_systematics()
101 changes: 101 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_8TEV_HIMASS/filter_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""
This module contains helper functions that are used to extract the uncertainties, kinematics and data values
from the rawdata files.
"""

import yaml


def get_kinematics():
"""
returns the kinematics in the form of a list of dictionaries.
"""
kin = []

hepdata_table = f"rawdata/HEPData-ins1467454-v1-Table_2.yaml"

with open(hepdata_table, 'r') as file:
input = yaml.safe_load(file)

for indep_var1, indep_var2 in zip(
input["independent_variables"][1]['values'], input["independent_variables"][0]['values']
):

kin_value = {
'abs_y': {
'min': indep_var1['low'],
'mid': 0.5 * (indep_var1['low'] + indep_var1['high']),
'max': indep_var1['high'],
},
'm_ll2': {
'min': indep_var2['low']**2,
'mid': (0.5 * (indep_var2['low'] + indep_var2['high']))**2,
'max': indep_var2['high']**2,
},
'sqrts': {'min': None, 'mid': 8000.0, 'max': None},
}

kin.append(kin_value)

return kin


def get_data_values():
"""
returns the central data values in the form of a list.
"""

data_central = []

hepdata_table = f"rawdata/HEPData-ins1467454-v1-Table_2.yaml"

with open(hepdata_table, 'r') as file:
input = yaml.safe_load(file)

values = input['dependent_variables'][0]['values']

for value, mass_bins in zip(values, input["independent_variables"][0]['values']):
# store data central and normalize to match applgrid predictions
data_central.append(value['value'] * 2 * (mass_bins['high'] - mass_bins['low']))

return data_central


def get_systematics():
""" """

uncertainties = []

hepdata_table = f"rawdata/HEPData-ins1467454-v1-Table_2.yaml"

with open(hepdata_table, 'r') as file:
input = yaml.safe_load(file)

# loop over systematics
for unc_labels in input['dependent_variables'][0]['values'][0]['errors']:

name = f"{unc_labels['label']}"
values = []

# loop over data points
for unc, mass_bins in zip(
input['dependent_variables'][0]['values'], input["independent_variables"][0]['values']
):
err = unc['errors']
# normalize the central values
cv = unc['value'] * 2 * (mass_bins['high'] - mass_bins['low'])

# convert unc from TeV to GeV
for e in err:
if e['label'] == name:

if 'asymerror' in e:
# the errors are actually symmetric.
values.append(float(e['asymerror']['plus'][:-1]) * cv / 100.0)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @comane, I landed here after I wondered how the experimental covmat gets constructed in validphys. When the experimentalists don't provide a covmat, we use the breakdown of systematics to construct it as follows
image

In general, the off diagonal components can have either sign in a covariance matrix, and so we should have that s_{i, corr} can either be positive or negative. But when we go on hepdata, typically all values we use for s_{i, corr} are actually positive! The experimentalists only give the size of s_{i, corr}, so any information on the sign is lost. Are we sure in that case the covmat can be constructed this way?

In this line selected here I noticed that the error you extract from hepdata is negative, is this done on purpose?


else:
values.append(float(e['symerror'][:-1]) * cv / 100.0)

uncertainties.append([{"name": name, "values": values}])

return uncertainties
Loading