NNPDF · scarlehoff · Dec 6, 2024 · Nov 8, 2024 · Nov 8, 2024 · Nov 8, 2024
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB/data.yaml b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB/data.yaml
@@ -0,0 +1,23 @@
+data_central:
+- 577150.0
+- 576870.0
+- 581750.0
+- 586070.0
+- 586330.0
+- 599070.0
+- 596750.0
+- 604170.0
+- 606930.0
+- 593400.0
+- 558460.0
+- 436450.0
+- 432780.0
+- 429290.0
+- 423380.0
+- 413640.0
+- 405260.0
+- 388020.0
+- 377510.0
+- 365820.0
+- 344700.0
+- 319040.0
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB/filter.py b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB/filter.py
@@ -0,0 +1,93 @@
+"""
+When running `python filter.py` the relevant data yaml
+file will be created in the `nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB` directory.
+"""
+
+import yaml
+from filter_utils import get_data_values, get_kinematics, get_systematics
+
+from nnpdf_data.filter_utils.utils import prettify_float
+
+yaml.add_representer(float, prettify_float)
+
+
+def filter_ATLAS_WPWM_7TEV_46FB_data_kinematic():
+    """
+    This function writes the central values to yaml files.
+    """
+    central_values = list(get_data_values())
+
+    kin = get_kinematics()
+
+    data_central_yaml = {"data_central": central_values}
+
+    kinematics_yaml = {"bins": kin}
+
+    # write central values and kinematics to yaml file
+    with open("data.yaml", "w") as file:
+        yaml.dump(data_central_yaml, file, sort_keys=False)
+
+    with open("kinematics.yaml", "w") as file:
+        yaml.dump(kinematics_yaml, file, sort_keys=False)
+
+
+def filter_ATLAS_WPWM_7TEV_46FB_systematics():
+    """
+    This function writes the systematics to a yaml file.
+    """
+
+    with open("metadata.yaml", "r") as file:
+        metadata = yaml.safe_load(file)
+
+    systematics = get_systematics()
+
+    # error definition
+    error_definitions = {}
+    errors = []
+
+    counter = 1
+
+    for sys in systematics:
+        if sys[0]['name'] == 'stat':
+            error_definitions[sys[0]['name']] = {
+                "description": "Uncorrelated statistical uncertainties",
+                "treatment": "ADD",
+                "type": "UNCORR",
+            }
+        elif 'UNCORR' in sys[0]['name']:
+            error_definitions['sys_corr_' + str(counter)] = {
+                "description": "Sys uncertainty idx: " + str(counter),
+                "treatment": "MULT",
+                "type": "UNCORR",
+            }
+            counter += 1
+        else:
+            error_definitions['sys_corr_' + str(counter)] = {
+                "description": "Sys uncertainty idx: " + str(counter),
+                "treatment": "MULT",
+                "type": f"{sys[0]['name']}",
+            }
+            counter += 1
+
+    for i in range(metadata['implemented_observables'][0]['ndata']):
+        error_value = {}
+        counter_2 = 0
+        for sys in systematics:
+            if counter_2 == 0:
+                error_value[sys[0]['name']] = float(sys[0]['values'][i])
+            else:
+                error_value['sys_corr_' + str(counter_2)] = float(sys[0]['values'][i])
+            counter_2 += 1
+
+        errors.append(error_value)
+
+    uncertainties_yaml = {"definitions": error_definitions, "bins": errors}
+
+    # write uncertainties
+    with open(f"uncertainties.yaml", 'w') as file:
+        yaml.dump(uncertainties_yaml, file, sort_keys=False)
+
+
+if __name__ == "__main__":
+    filter_ATLAS_WPWM_7TEV_46FB_data_kinematic()
+    filter_ATLAS_WPWM_7TEV_46FB_systematics()
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB/filter_utils.py
@@ -0,0 +1,116 @@
+"""
+This module contains helper functions that are used to extract the data values 
+from the rawdata files.
+"""
+
+import yaml
+import pandas as pd
+import numpy as np
+
+
+def get_data_values():
+    """
+    returns the central data values in the form of a list.
+    """
+
+    data_central = []
+
+    hepdata_table_1 = f"rawdata/HEPData-ins1502620-v1-Table_9.yaml"
+    hepdata_table_2 = f"rawdata/HEPData-ins1502620-v1-Table_10.yaml"
+
+    with open(hepdata_table_1, 'r') as file:
+        input_1 = yaml.safe_load(file)
+
+    with open(hepdata_table_2, 'r') as file:
+        input_2 = yaml.safe_load(file)
+
+    values_1 = input_1['dependent_variables'][0]['values']
+    values_2 = input_2['dependent_variables'][0]['values']
+
+    values = values_1 + values_2
+
+    for value in values:
+        # store data central and convert the units
+        data_central.append(value['value'] * 1000)
+
+    return data_central
+
+
+def get_kinematics():
+    """
+    returns the kinematics in the form of a list of dictionaries.
+    """
+    kin = []
+
+    hepdata_table_1 = f"rawdata/HEPData-ins1502620-v1-Table_9.yaml"
+    hepdata_table_2 = f"rawdata/HEPData-ins1502620-v1-Table_10.yaml"
+
+    with open(hepdata_table_1, 'r') as file:
+        input_1 = yaml.safe_load(file)
+
+    with open(hepdata_table_2, 'r') as file:
+        input_2 = yaml.safe_load(file)
+
+    for i, M in enumerate(input_1["independent_variables"][0]['values']):
+
+        kin_value = {
+            'abs_eta': {
+                'min': None,
+                'mid': (0.5 * (M['low'] + M['high'])),
+                'max': None,
+            },  # absolute lepton eta
+            'm_W2': {'min': None, 'mid': 6463.838404, 'max': None},
+            'sqrts': {'min': None, 'mid': 7000.0, 'max': None},
+        }
+
+        kin.append(kin_value)
+
+    for i, M in enumerate(input_2["independent_variables"][0]['values']):
+
+        kin_value = {
+            'abs_eta': {
+                'min': None,
+                'mid': (0.5 * (M['low'] + M['high'])),
+                'max': None,
+            },  # absolute lepton eta
+            'm_W2': {'min': None, 'mid': 6463.838404, 'max': None},
+            'sqrts': {'min': None, 'mid': 7000.0, 'max': None},
+        }
+
+        kin.append(kin_value)
+
+    return kin
+
+
+def get_systematics_dataframe():
+    """
+    returns the absolute systematic uncertainties in the form of a pandas dataframe.
+    """
+    sys_rawdata_path = "rawdata/wzrap11_full.csv"
+
+    df = pd.read_csv(sys_rawdata_path)
+    data_central = np.array(get_data_values())
+
+    # convert (MULT) percentage unc to absolute unc
+    abs_unc_df = (df.T[2:] * data_central).T / 100
+
+    return abs_unc_df
+
+
+def get_systematics():
+    """ """
+    abs_unc_df = get_systematics_dataframe()
+
+    uncertainties = []
+
+    for i, unc_dp in enumerate(abs_unc_df.values.T):
+        name = f"{abs_unc_df.columns[i]}"
+        values = [unc_dp[j] for j in range(len(unc_dp))]
+        uncertainties.append([{"name": name, "values": values}])
+
+    return uncertainties
+
+
+if __name__ == "__main__":
+    get_data_values()
+    get_systematics_dataframe()