Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
a331e04
added method that returns a new instance with modified central values…
Dec 13, 2022
beca837
added method to load validphys.core.CommonData from validphys.core.Co…
Dec 13, 2022
e192f80
added functions to write commondata and systype files and function to…
Dec 13, 2022
7e465c5
added functions used to generate pseudo data for closure tests
Dec 13, 2022
e4c8097
added test for validphys.pseudodata.make_level0_data function
Dec 13, 2022
61a9889
generation of level1 data done by make_replica function. Random seed …
Dec 13, 2022
7bf9018
import logging module within commondataparser
Dec 13, 2022
d9c2499
test_make_level0 data updated
Dec 13, 2022
c2af92b
theory 162 added
Dec 14, 2022
19d1f63
.
Dec 14, 2022
0a7442d
make_level0_data test done with theoryid 162
Dec 14, 2022
c1e5a16
added description to make_level1_data
Dec 14, 2022
b2e3a2c
added method to load list of validphys.coredata.CommonData instances …
Dec 14, 2022
ab0ca4c
list of commondata loaded with new DataGroupSpec method
Dec 14, 2022
c4dc48a
method name changed
Dec 14, 2022
cd75cc7
name of DataGroupSpec method changed
Dec 14, 2022
33d2ce9
reset_index of commondata tables
Dec 15, 2022
9a859b5
deleted test_filter_rebuild_closure_data.csv
Dec 15, 2022
eb438f9
regressions/test_filter_rebuild_closure_data.csv file updated
Dec 15, 2022
2c1abaa
bug in sytypes file name fixed
Dec 16, 2022
2c7f128
added functions to write commondata tables to files
Dec 16, 2022
16760bf
import new validphys.commondataparser functions to write commondata t…
Dec 16, 2022
9b8f5bc
added single_dataset
Dec 16, 2022
a1f9689
import info from conftest.py
Dec 16, 2022
d782d7a
unusued fakeset loaded with c++ removed
Dec 28, 2022
0ce9470
added functions to write commondata and systype data to buffer
Dec 28, 2022
66821be
write commondata and systype using commondataparser functions
Dec 28, 2022
6e86c5a
comment using numpy doc style
Dec 29, 2022
bd6be97
use assert_allclose from numpy.testing for arrays
comane Dec 31, 2022
a8d2055
use experiments_index to index level1 data in make_level1_data
comane Jan 4, 2023
f2a9624
use experiments_index provider
comane Jan 4, 2023
7e827c6
added commondata_wc provider to get commondata with cuts list given D…
comane Jan 4, 2023
3795663
make_level0_data renamed to level0_commondata_wc
comane Jan 4, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions validphys2/src/validphys/commondataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@

from validphys.core import peek_commondata_metadata
from validphys.coredata import CommonData
import logging

log = logging.getLogger(__name__)

def load_commondata(spec):
"""
Expand Down Expand Up @@ -89,3 +92,81 @@ def parse_systypes(systypefile):
systypetable.set_index("sys_index", inplace=True)

return systypetable



def write_commondata_data(commondata, buffer):
"""
write commondata table to buffer, this can be a memory map,
compressed archive or strings (using for instance StringIO)


Parameters
----------

commondata : validphys.coredata.CommonData

buffer : memory map, compressed archive or strings
example: StringIO object


Example
-------
>>> from validphys.loader import Loader
>>> from io import StringIO

>>> l = Loader()
>>> cd = l.check_commondata("NMC").load_commondata_instance()
>>> sio = StringIO()
>>> write_commondata_data(cd,sio)
>>> print(sio.getvalue())

"""
header = f"{commondata.setname} {commondata.nsys} {commondata.ndata}\n"
buffer.write(header)
commondata.commondata_table.to_csv(buffer, sep="\t", header=None)

def write_commondata_to_file(commondata,path):
"""
write commondata table to file
"""
with open(path,"w") as file:
write_commondata_data(commondata,file)

def write_systype_data(commondata, buffer):
"""
write systype table to buffer, this can be a memory map,
compressed archive or strings (using for instance StringIO)


Parameters
----------

commondata : validphys.coredata.CommonData

buffer : memory map, compressed archive or strings
example: StringIO object


Example
-------
>>> from validphys.loader import Loader
>>> from io import StringIO

>>> l = Loader()
>>> cd = l.check_commondata("NMC").load_commondata_instance()
>>> sio = StringIO()
>>> write_systype_data(cd,sio)
>>> print(sio.getvalue())

"""
header = f"{commondata.nsys}\n"
buffer.write(header)
commondata.systype_table.to_csv(buffer, sep="\t", header=None)

def write_systype_to_file(commondata,path):
"""
write systype table to file
"""
with open(path,"w") as file:
write_systype_data(commondata,file)
20 changes: 20 additions & 0 deletions validphys2/src/validphys/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,13 @@ def load(self)->CommonData:
#TODO: Use better path handling in python 3.6
return CommonData.ReadFile(str(self.datafile), str(self.sysfile))

def load_commondata_instance(self):
"""
load a validphys.core.CommonDataSpec to validphys.core.CommonData
"""
from validphys.commondataparser import load_commondata
return load_commondata(self)

@property
def plot_kinlabels(self):
return get_plot_kinlabels(self)
Expand Down Expand Up @@ -656,6 +663,19 @@ def load(self):
def load_commondata(self):
return [d.load_commondata() for d in self.datasets]

def load_commondata_instance(self):
"""
Given Experiment load list of validphys.coredata.CommonData
objects with cuts already applied
"""
commodata_list = []
for dataset in self.datasets:
if dataset.cuts is None:
commodata_list.append(dataset.commondata.load_commondata_instance())
else:
commodata_list.append(dataset.commondata.load_commondata_instance().with_cuts(dataset.cuts.load()))
return commodata_list

@property
def thspec(self):
#TODO: Is this good enough? Should we explicitly pass the theory
Expand Down
5 changes: 5 additions & 0 deletions validphys2/src/validphys/coredata.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,11 @@ def with_cuts(self, cuts):
def central_values(self):
return self.commondata_table["data"]

def with_central_value(self, cv):
tb = self.commondata_table.copy()
tb["data"] = cv
return dataclasses.replace(self, commondata_table=tb)

@property
def stat_errors(self):
return self.commondata_table["stat"]
Expand Down
104 changes: 91 additions & 13 deletions validphys2/src/validphys/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def prepare_nnpdf_rng(filterseed:int, rngalgo:int, seed:int):
RandomGenerator.GetRNG().SetSeed(filterseed)

@check_positive('errorsize')
def filter_closure_data(filter_path, data, fakepdf, fakenoise, errorsize, prepare_nnpdf_rng):
def filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, errorsize):
"""Filter closure data. In addition to cutting data points, the data is
generated from an underlying ``fakepdf``, applying a shift to the data
if ``fakenoise`` is ``True``, which emulates the experimental central values
Expand All @@ -103,12 +103,13 @@ def filter_closure_data(filter_path, data, fakepdf, fakenoise, errorsize, prepar
"""
log.info('Filtering closure-test data.')
return _filter_closure_data(
filter_path, data, fakepdf, fakenoise, errorsize)
filter_path, data, fakepdf, fakenoise, filterseed, errorsize)


@check_positive("errorsize")
def filter_closure_data_by_experiment(
filter_path, experiments_data, fakepdf, fakenoise, errorsize, prepare_nnpdf_rng,
filter_path, experiments_data, fakepdf, fakenoise, filterseed
, errorsize, experiments_index
):
"""
Like :py:func:`filter_closure_data` except filters data by experiment.
Expand All @@ -119,10 +120,14 @@ def filter_closure_data_by_experiment(
not reproducible.

"""
return [
_filter_closure_data(filter_path, exp, fakepdf, fakenoise, errorsize)
for exp in experiments_data
]

res = []
for exp in experiments_data:
experiment_index = experiments_index[experiments_index.isin([exp.name],level=0)]
res.append(_filter_closure_data(filter_path, exp, fakepdf, fakenoise,
filterseed, errorsize, experiment_index))

return res


def filter_real_data(filter_path, data):
Expand Down Expand Up @@ -157,6 +162,8 @@ def _write_ds_cut_data(path, dataset):

def _filter_real_data(filter_path, data):
"""Filter real experimental data."""


total_data_points = 0
total_cut_data_points = 0
for dataset in data.datasets:
Expand All @@ -168,24 +175,95 @@ def _filter_real_data(filter_path, data):
return total_data_points, total_cut_data_points


def _filter_closure_data(filter_path, data, fakepdf, fakenoise, errorsize):
"""Filter closure test data."""
def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, errorsize, experiments_index):
"""
This function is accessed within a closure test only, that is, the fakedata
namespace has to be True (If fakedata = False, the _filter_real_data function
will be used to write the commondata files).

The function writes commondata and systypes files within the
name_closure_test/filter folder.
If fakenoise is True, Level 1 type data is written to the filter folder, otherwise
Level 0 data is written.

Level 1 data is generated from the Level 0 data by adding noise sampled from
the experimental covariance matrix using the validphys.pseudodata.make_replica
function.

Parameters
----------

filter_path : str
path to filter folder

data : validphys.core.DataGroupSpec

fakepdf : validphys.core.PDF

fakenoise : bool
if fakenoise perform level1 shift of central data values

filterseed : int
random seed used for the generation of
random noise added to Level 0 data

errorsize : float
(defined in runcard)

experiments_index : pandas.MultiIndex


Returns
-------
tuple
total data points and points passing the cuts

"""

total_data_points = 0
total_cut_data_points = 0
fakeset = fakepdf.legacy_load()
# Load data, don't cache result
loaded_data = data.load.__wrapped__(data)
# generate level 1 shift if fakenoise
loaded_data.MakeClosure(fakeset, fakenoise)

from validphys.pseudodata import level0_commondata_wc
level0_commondata_instances_wc = level0_commondata_wc(data,fakepdf)
commondata_instances_wc = data.load_commondata_instance() # used to generate experimental covariance matrix

for j, dataset in enumerate(data.datasets):
#== print number of points passing cuts, make dataset directory and write FKMASK ==#
path = filter_path / dataset.name
nfull, ncut = _write_ds_cut_data(path, dataset)
make_dataset_dir(path / "systypes")
total_data_points += nfull
total_cut_data_points += ncut
# Rescale errors
loaded_ds = loaded_data.GetSet(j)
if errorsize != 1.0:
loaded_ds.RescaleErrors(errorsize)
loaded_ds.Export(str(path))

from validphys.commondataparser import write_commondata_to_file, write_systype_to_file
if not fakenoise:
#======= Level 0 closure test =======#
log.info("Writing Level0 data")
for l0_cd in level0_commondata_instances_wc:
path_cd = filter_path / l0_cd.setname / f"DATA_{l0_cd.setname}.dat"
path_sys = filter_path / l0_cd.setname / "systypes" / f"SYSTYPE_{l0_cd.setname}_DEFAULT.dat"
write_commondata_to_file(commondata=l0_cd,path=path_cd)
write_systype_to_file(commondata=l0_cd,path=path_sys)

else:
#======= Level 1 closure test =======#
from validphys.pseudodata import make_level1_data
level1_commondata_instances_wc = make_level1_data(data,commondata_instances_wc,level0_commondata_instances_wc,
filterseed, experiments_index)
#====== write commondata and systype files ======#
log.info("Writing Level1 data")
for l1_cd in level1_commondata_instances_wc:
path_cd = filter_path / l1_cd.setname / f"DATA_{l1_cd.setname}.dat"
path_sys = filter_path / l1_cd.setname / "systypes" / f"SYSTYPE_{l1_cd.setname}_DEFAULT.dat"
write_commondata_to_file(commondata=l1_cd,path=path_cd)
write_systype_to_file(commondata=l1_cd,path=path_sys)

return total_data_points, total_cut_data_points


Expand Down
Loading