Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB/data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
data_central:
- 577150.0
- 576870.0
- 581750.0
- 586070.0
- 586330.0
- 599070.0
- 596750.0
- 604170.0
- 606930.0
- 593400.0
- 558460.0
- 436450.0
- 432780.0
- 429290.0
- 423380.0
- 413640.0
- 405260.0
- 388020.0
- 377510.0
- 365820.0
- 344700.0
- 319040.0
93 changes: 93 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""
When running `python filter.py` the relevant data yaml
file will be created in the `nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB` directory.
"""

import yaml
from filter_utils import get_data_values, get_kinematics, get_systematics

from nnpdf_data.filter_utils.utils import prettify_float

yaml.add_representer(float, prettify_float)

Comment thread
scarlehoff marked this conversation as resolved.

def filter_ATLAS_WPWM_7TEV_46FB_data_kinematic():
"""
This function writes the central values to yaml files.
"""
central_values = list(get_data_values())

kin = get_kinematics()

data_central_yaml = {"data_central": central_values}

kinematics_yaml = {"bins": kin}

# write central values and kinematics to yaml file
with open("data.yaml", "w") as file:
yaml.dump(data_central_yaml, file, sort_keys=False)

with open("kinematics.yaml", "w") as file:
yaml.dump(kinematics_yaml, file, sort_keys=False)


def filter_ATLAS_WPWM_7TEV_46FB_systematics():
"""
This function writes the systematics to a yaml file.
"""

with open("metadata.yaml", "r") as file:
metadata = yaml.safe_load(file)

systematics = get_systematics()

# error definition
error_definitions = {}
errors = []

counter = 1

for sys in systematics:
if sys[0]['name'] == 'stat':
error_definitions[sys[0]['name']] = {
"description": "Uncorrelated statistical uncertainties",
"treatment": "ADD",
"type": "UNCORR",
}
elif 'UNCORR' in sys[0]['name']:
error_definitions['sys_corr_' + str(counter)] = {
"description": "Sys uncertainty idx: " + str(counter),
"treatment": "MULT",
"type": "UNCORR",
}
counter += 1
else:
error_definitions['sys_corr_' + str(counter)] = {
"description": "Sys uncertainty idx: " + str(counter),
"treatment": "MULT",
"type": f"{sys[0]['name']}",
}
counter += 1

for i in range(metadata['implemented_observables'][0]['ndata']):
error_value = {}
counter_2 = 0
for sys in systematics:
if counter_2 == 0:
error_value[sys[0]['name']] = float(sys[0]['values'][i])
else:
error_value['sys_corr_' + str(counter_2)] = float(sys[0]['values'][i])
counter_2 += 1

errors.append(error_value)

uncertainties_yaml = {"definitions": error_definitions, "bins": errors}

# write uncertainties
with open(f"uncertainties.yaml", 'w') as file:
yaml.dump(uncertainties_yaml, file, sort_keys=False)


if __name__ == "__main__":
filter_ATLAS_WPWM_7TEV_46FB_data_kinematic()
filter_ATLAS_WPWM_7TEV_46FB_systematics()
116 changes: 116 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB/filter_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""
This module contains helper functions that are used to extract the data values
from the rawdata files.
"""

import yaml
import pandas as pd
import numpy as np


def get_data_values():
"""
returns the central data values in the form of a list.
"""

data_central = []

hepdata_table_1 = f"rawdata/HEPData-ins1502620-v1-Table_9.yaml"
hepdata_table_2 = f"rawdata/HEPData-ins1502620-v1-Table_10.yaml"

with open(hepdata_table_1, 'r') as file:
input_1 = yaml.safe_load(file)

with open(hepdata_table_2, 'r') as file:
input_2 = yaml.safe_load(file)

values_1 = input_1['dependent_variables'][0]['values']
values_2 = input_2['dependent_variables'][0]['values']

values = values_1 + values_2

for value in values:
# store data central and convert the units
data_central.append(value['value'] * 1000)

return data_central


def get_kinematics():
"""
returns the kinematics in the form of a list of dictionaries.
"""
kin = []

hepdata_table_1 = f"rawdata/HEPData-ins1502620-v1-Table_9.yaml"
hepdata_table_2 = f"rawdata/HEPData-ins1502620-v1-Table_10.yaml"

with open(hepdata_table_1, 'r') as file:
input_1 = yaml.safe_load(file)

with open(hepdata_table_2, 'r') as file:
input_2 = yaml.safe_load(file)

for i, M in enumerate(input_1["independent_variables"][0]['values']):

kin_value = {
'abs_eta': {
'min': None,
'mid': (0.5 * (M['low'] + M['high'])),
'max': None,
}, # absolute lepton eta
'm_W2': {'min': None, 'mid': 6463.838404, 'max': None},
'sqrts': {'min': None, 'mid': 7000.0, 'max': None},
}

kin.append(kin_value)

for i, M in enumerate(input_2["independent_variables"][0]['values']):

kin_value = {
'abs_eta': {
'min': None,
'mid': (0.5 * (M['low'] + M['high'])),
'max': None,
}, # absolute lepton eta
'm_W2': {'min': None, 'mid': 6463.838404, 'max': None},
'sqrts': {'min': None, 'mid': 7000.0, 'max': None},
}

kin.append(kin_value)

return kin


def get_systematics_dataframe():
"""
returns the absolute systematic uncertainties in the form of a pandas dataframe.
"""
sys_rawdata_path = "rawdata/wzrap11_full.csv"

df = pd.read_csv(sys_rawdata_path)
Comment thread
scarlehoff marked this conversation as resolved.
data_central = np.array(get_data_values())

# convert (MULT) percentage unc to absolute unc
abs_unc_df = (df.T[2:] * data_central).T / 100

return abs_unc_df


def get_systematics():
""" """
abs_unc_df = get_systematics_dataframe()

uncertainties = []

for i, unc_dp in enumerate(abs_unc_df.values.T):
name = f"{abs_unc_df.columns[i]}"
values = [unc_dp[j] for j in range(len(unc_dp))]
uncertainties.append([{"name": name, "values": values}])

return uncertainties


if __name__ == "__main__":
get_data_values()
get_systematics_dataframe()
Loading