NNPDF · scarlehoff · Jan 21, 2025 · Nov 9, 2024 · Nov 9, 2024 · Nov 9, 2024
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_8TEV_HIMASS/data.yaml b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_8TEV_HIMASS/data.yaml
@@ -0,0 +1,49 @@
+data_central:
+- 2.78800000e+00
+- 2.7472
+- 2.7268
+- 2.7472
+- 2.69960000e+00
+- 2.686
+- 2.61120000e+00
+- 2.36640000e+00
+- 1.9856
+- 1.5572
+- 1.04720000e+00
+- 4.97080000e-01
+- 1.04
+- 1.04
+- 1.03
+- 1.05
+- 1.02
+- 0.968
+- 9.12000000e-01
+- 0.779
+- 0.664
+- 0.483
+- 0.335
+- 0.163
+- 0.484
+- 4.78000000e-01
+- 0.486
+- 0.496
+- 4.58000000e-01
+- 0.432
+- 0.376
+- 0.332
+- 0.268
+- 0.2
+- 0.1208
+- 0.0486
+- 1.42000000e-01
+- 0.1344
+- 1.26400000e-01
+- 0.098
+- 0.0524
+- 1.52400000e-02
+- 0.0294
+- 0.0276
+- 2.28000000e-02
+- 0.0161
+- 0.00442
+- 5.74000000e-04
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_8TEV_HIMASS/filter.py b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_8TEV_HIMASS/filter.py
@@ -0,0 +1,96 @@
+"""
+filter.py module for ATLAS_Z0_8TEV_HIMASS dataset
+When running `python filter.py` the relevant uncertainties , data and kinematics yaml
+file will be created in the `nnpdf_data/commondata/ATLAS_Z0_8TEV_LOWMASS` directory.
+"""
+
+import yaml
+from filter_utils import get_kinematics, get_data_values, get_systematics
+from nnpdf_data.filter_utils.utils import prettify_float
+
+yaml.add_representer(float, prettify_float)
+
+
+def filter_ATLAS_Z0_8TEV_HIMASS_data_kinetic():
+    """
+    This function writes the central values and kinematics to yaml files.
+    """
+
+    kin = get_kinematics()
+    central_values = list(get_data_values())
+
+    data_central_yaml = {"data_central": central_values}
+
+    kinematics_yaml = {"bins": kin}
+
+    # write central values and kinematics to yaml file
+    with open("data.yaml", "w") as file:
+        yaml.dump(data_central_yaml, file, sort_keys=False)
+
+    with open("kinematics.yaml", "w") as file:
+        yaml.dump(kinematics_yaml, file, sort_keys=False)
+
+
+def filter_ATLAS_Z0_8TEV_HIMASS_systematics():
+    """
+    This function writes the systematics to a yaml file.
+    """
+
+    with open("metadata.yaml", "r") as file:
+        metadata = yaml.safe_load(file)
+
+    systematics = get_systematics()
+
+    # error definition
+    error_definitions = {}
+    errors = []
+
+    for sys in systematics:
+        if sys[0]['name'] == 'stat':
+            sys[0]['name'] = 'stat_mult'
+            error_definitions[sys[0]['name']] = {
+                "description": sys[0]['name'],  # stat is required to have treatment == ADD
+                "treatment": "MULT",
+                "type": "UNCORR",
+            }
+
+        elif sys[0]['name'] == 'sys,unc':
+            error_definitions[sys[0]['name']] = {
+                "description": f"{sys[0]['name']}",
+                "treatment": "MULT",
+                "type": "UNCORR",
+            }
+
+        elif sys[0]['name'] == 'sys,lumi':
+            error_definitions["ATLASLUMI12"] = {
+                "description": f"ATLASLUMI12",
+                "treatment": "MULT",
+                "type": "ATLASLUMI12",
+            }
+
+        else:
+            error_definitions[sys[0]['name']] = {
+                "description": f"{sys[0]['name']}",
+                "treatment": "MULT",
+                "type": "CORR",
+            }
+
+    #
+    for i in range(metadata['implemented_observables'][0]['ndata']):
+        error_value = {}
+
+        for sys in systematics:
+            error_value[sys[0]['name']] = float(sys[0]['values'][i])
+
+        errors.append(error_value)
+
+    uncertainties_yaml = {"definitions": error_definitions, "bins": errors}
+
+    # write uncertainties
+    with open(f"uncertainties.yaml", 'w') as file:
+        yaml.dump(uncertainties_yaml, file, sort_keys=False)
+
+
+if __name__ == "__main__":
+    filter_ATLAS_Z0_8TEV_HIMASS_data_kinetic()
+    filter_ATLAS_Z0_8TEV_HIMASS_systematics()
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_8TEV_HIMASS/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_8TEV_HIMASS/filter_utils.py
@@ -0,0 +1,101 @@
+"""
+This module contains helper functions that are used to extract the uncertainties, kinematics and data values 
+from the rawdata files.
+"""
+
+import yaml
+
+
+def get_kinematics():
+    """
+    returns the kinematics in the form of a list of dictionaries.
+    """
+    kin = []
+
+    hepdata_table = f"rawdata/HEPData-ins1467454-v1-Table_2.yaml"
+
+    with open(hepdata_table, 'r') as file:
+        input = yaml.safe_load(file)
+
+    for indep_var1, indep_var2 in zip(
+        input["independent_variables"][1]['values'], input["independent_variables"][0]['values']
+    ):
+
+        kin_value = {
+            'abs_y': {
+                'min': indep_var1['low'],
+                'mid': 0.5 * (indep_var1['low'] + indep_var1['high']),
+                'max': indep_var1['high'],
+            },
+            'm_ll2': {
+                'min': indep_var2['low']**2,
+                'mid': (0.5 * (indep_var2['low'] + indep_var2['high']))**2,
+                'max': indep_var2['high']**2,
+            },
+            'sqrts': {'min': None, 'mid': 8000.0, 'max': None},
+        }
+
+        kin.append(kin_value)
+
+    return kin
+
+
+def get_data_values():
+    """
+    returns the central data values in the form of a list.
+    """
+
+    data_central = []
+
+    hepdata_table = f"rawdata/HEPData-ins1467454-v1-Table_2.yaml"
+
+    with open(hepdata_table, 'r') as file:
+        input = yaml.safe_load(file)
+
+    values = input['dependent_variables'][0]['values']
+
+    for value, mass_bins in zip(values, input["independent_variables"][0]['values']):
+        # store data central and normalize to match applgrid predictions
+        data_central.append(value['value'] * 2 * (mass_bins['high'] - mass_bins['low']))
+
+    return data_central
+
+
+def get_systematics():
+    """ """
+
+    uncertainties = []
+
+    hepdata_table = f"rawdata/HEPData-ins1467454-v1-Table_2.yaml"
+
+    with open(hepdata_table, 'r') as file:
+        input = yaml.safe_load(file)
+
+    # loop over systematics
+    for unc_labels in input['dependent_variables'][0]['values'][0]['errors']:
+
+        name = f"{unc_labels['label']}"
+        values = []
+
+        # loop over data points
+        for unc, mass_bins in zip(
+            input['dependent_variables'][0]['values'], input["independent_variables"][0]['values']
+        ):
+            err = unc['errors']
+            # normalize the central values
+            cv = unc['value'] * 2 * (mass_bins['high'] - mass_bins['low'])
+
+            # convert unc from TeV to GeV
+            for e in err:
+                if e['label'] == name:
+
+                    if 'asymerror' in e:
+                        # the errors are actually symmetric.
+                        values.append(float(e['asymerror']['plus'][:-1]) * cv / 100.0)
+
+                    else:
+                        values.append(float(e['symerror'][:-1]) * cv / 100.0)
+
+        uncertainties.append([{"name": name, "values": values}])
+
+    return uncertainties