Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/CMS_1JET_7TEV/data_R07.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
data_central:
- 2953.2690000000002
- 1335.78105
- 639.31735
- 311.22885
- 161.26548
- 85.440625
- 46.12412500000001
- 24.90702
- 13.953918499999999
- 7.8356555000000006
- 4.4873655
- 2.62893
- 1.5238954
- 0.87663425
- 0.51637855
- 0.3088446
- 0.1813905
- 0.107358825
- 0.064056495
- 0.037641245000000004
- 0.022154590000000002
- 0.0131474275
- 0.007515096
- 0.0042382505000000004
- 0.0023162765
- 0.001479684
- 0.00080651855
- 0.0003330529
- 2799.6155
- 1229.82535
- 576.0116
- 286.699
- 146.18525
- 76.11559
- 40.519945
- 22.31715
- 12.4270435
- 6.989143
- 3.9380905
- 2.2736349999999996
- 1.3016353999999999
- 0.74941965
- 0.4246726
- 0.24525160000000001
- 0.14281038999999998
- 0.08300146
- 0.047785060000000004
- 0.027622405
- 0.015534824999999999
- 0.0083614125
- 0.004397193
- 0.002400696
- 0.0014339545
- 0.0007708299000000001
- 0.00032298115
- 0.0001540647
- 2351.125
- 1032.87005
- 476.55859999999996
- 230.03785
- 117.05122
- 61.171135
- 31.907865
- 16.5701565
- 9.0991505
- 4.9939
- 2.7480735
- 1.52631545
- 0.83555045
- 0.4604848
- 0.2546981
- 0.13994145
- 0.07495685
- 0.040092575
- 0.021179149999999997
- 0.010701724000000001
- 0.005330658
- 0.0026574930000000004
- 0.0012888560000000001
- 0.0005364438
- 0.0002587899
- 1824.0725
- 798.7361500000001
- 355.31155
- 167.7876
- 83.147695
- 40.312630000000006
- 21.334575
- 10.545282499999999
- 5.5819435
- 2.8101975
- 1.4301964999999999
- 0.74639455
- 0.36604785
- 0.18549759999999998
- 0.086206375
- 0.039591520000000005
- 0.018171475000000003
- 0.008225115
- 0.003620559
- 0.0014617690000000001
- 0.000520213
- 1221.273
- 512.21385
- 222.50975
- 101.20700000000001
- 44.18917
- 20.625805
- 9.451005
- 4.073024
- 1.801326
- 0.7855935000000001
- 0.32102845
- 0.12287545
- 0.04706562
- 0.01631914
- 0.005333325
- 0.001682455
172 changes: 172 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/CMS_1JET_7TEV/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import yaml
import numpy as np
from nnpdf_data.filter_utils.utils import prettify_float

yaml.add_representer(float, prettify_float)

'''
This is an extended version of the filter_no_intersection. This implementation takes
the intersection of the 1406.0324 and 1212.6660 measurements. This is to ensure that the
binning in the grids available for 1212.6660 matches the binning of 1406.0324.
'''

def read_metadata():
'''
this function reads metadata
and returns the list of tables used
in the implementation
'''
temp_dict = {}
with open('metadata.yaml', 'r') as file:
temp_dict = yaml.safe_load(file)
observable = temp_dict['implemented_observables'][0]
return observable['tables']

def read_table(table_no: int):
'''
this function takes the table number and
returns the corresponding:
kinematic bins
central values corrected as per eq 28
uncertainties corrected as per eq 29
in paper https://arxiv.org/pdf/physics/0403086
'''
temp_dict = dict()
with open(f'rawdata/Table{table_no}.yaml', 'r') as file:
temp_dict = yaml.safe_load(file)

# sort out kinematic bins:
sqrts_val = float(temp_dict['dependent_variables'][0]['qualifiers'][1]['value'])
ybin = temp_dict['dependent_variables'][0]['qualifiers'][0]['value']
ymin, ymax = float(ybin[:3]), float(ybin[4:7])
ymid = (ymin+ymax)/2
bins_in_table = list()
for ptbin in temp_dict['independent_variables'][0]['values']:
y_dict = {'y': {'min': ymin, 'mid': ymid, 'max': ymax}}
sqrts_dict = {'sqrts': {'min': None, 'mid': sqrts_val, 'max': None}}
pT_dict = {'pT': {'min': ptbin['low'], 'mid': (ptbin['low']+ptbin['high'])/2, 'max': ptbin['high']}}
bins_in_table.append(y_dict | pT_dict | sqrts_dict)

# read the central values and the uncertainties
central_values = list()
stat = list()
sys = list()
for dep_var in temp_dict['dependent_variables']:
if dep_var['header']['name'] == 'D2(SIG)/DPT/DABS(YRAP)':
for bin_val in dep_var['values']:
central_values.append(bin_val['value'])
for err in bin_val['errors']:
if err['label'] == 'stat':
stat.append(err['symerror'])
elif err['label'] == 'sys':
sys.append((np.abs(err['asymerror']['minus']), err['asymerror']['plus']))

# process the asymmetric uncertainties
sys_processed = list()
shifts_in_central = list()
for (del_minus, del_plus) in sys:
# calculating delta and Deltabar as per eqs 23, 24
delta = (del_plus-del_minus)/2
Deltabar = float((del_plus+del_minus)/2)
# each delta/Deltabar is of order 0.02, so using eq 28, 29 is justified
shifts_in_central.append(delta)
sys_processed.append(Deltabar)

corrected_centrals_in_table = list(np.array(central_values)+np.array(shifts_in_central))
uncertainties_in_table = list()
for i in range(len(stat)):
unc_dict = {'stat': stat[i], 'sys': sys_processed[i]}
uncertainties_in_table.append(unc_dict)

return bins_in_table, corrected_centrals_in_table, uncertainties_in_table

'''
above function returns three lists: bins, central values, and uncertainties for a given table.
we can now take the bins for corresponding tables in 1212.6660 and then take intersections.
'''

def read_old_bins(table_no: int):
'''
takes a table number and returns the old bins
'''
old_bins = list()
temp_list = list()
with open(f'rawdata_1212p6660/Table{table_no}.yaml', 'r') as f:
temp_list = yaml.safe_load(f)['independent_variables'][0]['values']
for ptbin in temp_list:
ptmin = ptbin['low']
ptmax = ptbin['high']
pT_dict = {
'min': ptmin,
'mid': (ptmin+ptmax)/2,
'max': ptmax
}
old_bins.append(pT_dict)
return old_bins

def build_intersection(table_no: int, show_bins_to_delete=False):
'''
take the number of the table (as in raw data) and find the intersections
to build the new implementation
'''
table_corr = {str(i): i-6 for i in range(7, 12)} # which table number in the old data corresponds to the table in new data
bins_in_table, corrected_centrals_in_table, uncertainties_in_table = read_table(table_no)
old_bins = read_old_bins(table_corr[str(table_no)])
bins_int = list()
centrals_int = list()
unc_int = list()
intersection_bins = list()
for bin, central, uncertainty in zip(bins_in_table, corrected_centrals_in_table, uncertainties_in_table):
if bin['pT'] in old_bins:
bins_int.append(bin)
centrals_int.append(central)
unc_int.append(uncertainty)
if show_bins_to_delete:
intersection_bins.append(bin['pT'])

if show_bins_to_delete:
print(f'from grid number {table_corr[str(table_no)]}, one needs to remove:')
for old_bin in old_bins:
if old_bin not in intersection_bins:
print(old_bin)

return bins_int, centrals_int, unc_int



def main_filter() -> None:
'''
main filter that reads all the tables and saves the dataset in .yaml files
'''

tables = read_metadata()
kinematics = list()
data_central = list()
uncertainties = list()
for table_no in tables:
current_bins, current_central, current_unc = build_intersection(table_no)
kinematics += current_bins
data_central += current_central
uncertainties += current_unc

with open('kinematics_R07.yaml', 'w') as file:
yaml.safe_dump({'bins': kinematics}, file, sort_keys=False)

data_central_float = [float(central_value) for central_value in data_central]
with open('data_R07.yaml', 'w') as file:
yaml.safe_dump({'data_central': data_central_float}, file, sort_keys=False)

unc_definitions = {'definitions': {'sys': {'description': 'combined systematic ucertainties (symmetrised), including JES correction, pT resolution, luminosity', 'treatment': 'MULT', 'type': 'CORR'}, 'stat': {'description': 'combined statistical uncertainties', 'treatment': 'ADD', 'type': 'UNCORR'}}}

with open('uncertainties_R07.yaml', 'w') as file:
yaml.safe_dump(unc_definitions | {'bins': uncertainties}, file, sort_keys=False)

print(f'number of datapoints: {len(kinematics)}')
if len(kinematics)==len(data_central) and len(kinematics)==len(uncertainties):
print('the number of bins is consistent across files')
else:
print('number of bins is inconsistent')


if __name__ == '__main__':
main_filter()
Loading
Loading