From 6d1997cae898b41b8ad3d408046083b1489fd6ab Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 15 Dec 2022 15:46:18 +0100 Subject: [PATCH 1/7] use python commondata instead of libNNPDF --- validphys2/src/validphys/commondataparser.py | 101 +++++++++++++++--- validphys2/src/validphys/core.py | 61 +++-------- validphys2/src/validphys/coredata.py | 43 ++++++-- validphys2/src/validphys/filters.py | 54 +++++++--- validphys2/src/validphys/fitdata.py | 4 +- validphys2/src/validphys/mc_gen.py | 1 + validphys2/src/validphys/plotoptions/core.py | 3 +- validphys2/src/validphys/tests/test_loader.py | 4 +- .../src/validphys/tests/test_weights.py | 12 ++- 9 files changed, 195 insertions(+), 88 deletions(-) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index ab2cbaf8c5..bf6966b13d 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -2,17 +2,50 @@ This module implements parsers for commondata and systype files into useful datastructures, contained in the :py:mod:`validphys.coredata` module, which are not backed by C++ managed memory, and so they can be easily pickled and -interfaces with common Python libraries. The integration of these objects into -the codebase is currently work in progress, and at the moment this module -serves as a proof of concept. +interfaces with common Python libraries. + +The validphys commondata structure is an instance of :py:class:`validphys.coredata.CommonData` """ +from collections import namedtuple from operator import attrgetter +import logging import pandas as pd -from validphys.core import peek_commondata_metadata from validphys.coredata import CommonData +log = logging.getLogger(__name__) + +kinlabels_latex = { + "DIJET": ("\\eta", "$\\m_{1,2} (GeV)", "$\\sqrt{s} (GeV)"), + "DIS": ("$x$", "$Q^2 (GeV^2)$", "$y$"), + "DYP": ("$y$", "$M^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "EWJ_JPT": ("$p_T (GeV)$", "$M^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "EWJ_JRAP": ("$\\eta/y$", "$M^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "EWJ_MLL": ("$M_{ll} (GeV)$", "$M_{ll}^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "EWJ_PT": ("$p_T (GeV)$", "$M^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "EWJ_PTRAP": ("$\\eta/y$", "$p_T^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "EWJ_RAP": ("$\\eta/y$", "$M^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "EWK_MLL": ("$M_{ll} (GeV)$", "$M_{ll}^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "EWK_PT": ("$p_T$ (GeV)", "$M^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "EWK_PTRAP": ("$\\eta/y$", "$p_T^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "EWK_RAP": ("$\\eta/y$", "$M^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "HIG_RAP": ("$y$", "$M_H^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "HQP_MQQ": ("$M^{QQ} (GeV)$", "$\\mu^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "HQP_PTQ": ("$p_T^Q (GeV)$", "$\\mu^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "HQP_PTQQ": ("$p_T^{QQ} (GeV)$", "$\\mu^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "HQP_YQ": ("$y^Q$", "$\\mu^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "HQP_YQQ": ("$y^{QQ}$", "$\\mu^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "INC": ("$0$", "$\\mu^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "JET": ("$\\eta$", "$p_T^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "PHT": ("$\\eta_\\gamma$", "$E_{T,\\gamma}^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "SIA": ("$z$", "$Q^2 (GeV^2)$", "$y$"), +} + + +_kinlabels_keys = sorted(kinlabels_latex, key=len, reverse=True) + + def load_commondata(spec): """ Load the data corresponding to a CommonDataSpec object. @@ -42,12 +75,12 @@ def parse_commondata(commondatafile, systypefile, setname): and systype files. """ # First parse commondata file - commondatatable = pd.read_csv(commondatafile, sep=r'\s+', skiprows=1, header=None) + commondatatable = pd.read_csv(commondatafile, sep=r"\s+", skiprows=1, header=None) # Remove NaNs # TODO: replace commondata files with bad formatting # Build header - commondataheader = ['entry', 'process', 'kin1', 'kin2', 'kin3', 'data', 'stat'] - nsys = (commondatatable.shape[1] - len(commondataheader)) // 2 + commondataheader = ["entry", "process", "kin1", "kin2", "kin3", "data", "stat"] + nsys = (commondatatable.shape[1] - len(commondataheader)) // 2 commondataheader += ["ADD", "MULT"] * nsys commondatatable.columns = commondataheader @@ -55,8 +88,8 @@ def parse_commondata(commondatafile, systypefile, setname): ndata = len(commondatatable) commondataproc = commondatatable["process"][1] # Check for consistency with commondata metadata - cdmetadata = peek_commondata_metadata(commondatafile) - if (setname, nsys, ndata) != attrgetter('name', 'nsys', 'ndata')(cdmetadata): + cdmetadata = peek_commondata_metadata(commondatafile) + if (setname, nsys, ndata) != attrgetter("name", "nsys", "ndata")(cdmetadata): raise ValueError("Commondata table information does not match metadata") # Now parse the systype file @@ -70,18 +103,18 @@ def parse_commondata(commondatafile, systypefile, setname): nkin=3, nsys=nsys, commondata_table=commondatatable, - systype_table=systypetable + systype_table=systypetable, ) + def parse_systypes(systypefile): - """Parses a systype file and returns a pandas dataframe. - """ + """Parses a systype file and returns a pandas dataframe.""" systypeheader = ["sys_index", "type", "name"] try: systypetable = pd.read_csv( systypefile, sep=r"\s+", names=systypeheader, skiprows=1, header=None ) - systypetable.dropna(axis='columns', inplace=True) + systypetable.dropna(axis="columns", inplace=True) # Some datasets e.g. CMSWCHARMRAT have no systematics except pd.errors.EmptyDataError: systypetable = pd.DataFrame(columns=systypeheader) @@ -89,3 +122,45 @@ def parse_systypes(systypefile): systypetable.set_index("sys_index", inplace=True) return systypetable + + +CommonDataMetadata = namedtuple("CommonDataMetadata", ("name", "nsys", "ndata", "process_type")) + + +def peek_commondata_metadata(commondatafilename): + """Check some basic properties commondata object without going though the + trouble of processing it on the C++ side""" + with open(commondatafilename) as f: + try: + l = f.readline() + name, nsys_str, ndata_str = l.split() + l = f.readline() + process_type_str = l.split()[1] + except Exception: + log.error(f"Error processing {commondatafilename}") + raise + + return CommonDataMetadata( + name, int(nsys_str), int(ndata_str), get_kinlabel_key(process_type_str) + ) + + +def get_plot_kinlabels(commondata): + """Return the LaTex kinematic labels for a given Commondata""" + key = commondata.process_type + + return kinlabels_latex[key] + + +def get_kinlabel_key(process_label): + # Since there is no 1:1 correspondence between latex keys and GetProc, + # we match the longest key such that the proc label starts with it. + l = process_label + try: + return next(k for k in _kinlabels_keys if l.startswith(k)) + except StopIteration as e: + raise ValueError( + "Could not find a set of kinematic " + "variables matching the process %s Check the " + "labels defined in commondata.cc. " % (l) + ) from e diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 694135e3ae..98d5fe8990 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -8,7 +8,6 @@ """ from __future__ import generator_stop -from collections import namedtuple import re import enum import functools @@ -24,7 +23,7 @@ from reportengine.compat import yaml from NNPDF import (LHAPDFSet as libNNPDF_LHAPDFSet, - CommonData, + CommonData as LegacyCommonData, FKTable, FKSet, DataSet, @@ -41,6 +40,9 @@ from validphys.lhapdfset import LHAPDFSet from validphys.fkparser import load_fktable from validphys.pineparser import pineappl_reader +from validphys.commondataparser import (peek_commondata_metadata, + get_plot_kinlabels, + parse_commondata,) log = logging.getLogger(__name__) @@ -234,46 +236,6 @@ def get_members(self): return len(self) -kinlabels_latex = CommonData.kinLabel_latex.asdict() -_kinlabels_keys = sorted(kinlabels_latex, key=len, reverse=True) - - -def get_plot_kinlabels(commondata): - """Return the LaTex kinematic labels for a given Commondata""" - key = commondata.process_type - - return kinlabels_latex[key] - -def get_kinlabel_key(process_label): - #Since there is no 1:1 correspondence between latex keys and GetProc, - #we match the longest key such that the proc label starts with it. - l = process_label - try: - return next(k for k in _kinlabels_keys if l.startswith(k)) - except StopIteration as e: - raise ValueError("Could not find a set of kinematic " - "variables matching the process %s Check the " - "labels defined in commondata.cc. " % (l)) from e - -CommonDataMetadata = namedtuple('CommonDataMetadata', ('name', 'nsys', 'ndata', 'process_type')) - -def peek_commondata_metadata(commondatafilename): - """Check some basic properties commondata object without going though the - trouble of processing it on the C++ side""" - with open(commondatafilename) as f: - try: - l = f.readline() - name, nsys_str, ndata_str = l.split() - l = f.readline() - process_type_str = l.split()[1] - except Exception: - log.error(f"Error processing {commondatafilename}") - raise - - return CommonDataMetadata(name, int(nsys_str), int(ndata_str), - get_kinlabel_key(process_type_str)) - - class CommonDataSpec(TupleComp): def __init__(self, datafile, sysfile, plotfiles, name=None, metadata=None): self.datafile = datafile @@ -312,9 +274,11 @@ def __iter__(self): return iter((self.datafile, self.sysfile, self.plotfiles)) @functools.lru_cache() - def load(self)->CommonData: - #TODO: Use better path handling in python 3.6 - return CommonData.ReadFile(str(self.datafile), str(self.sysfile)) + def load(self): + cd = parse_commondata(self.datafile, self.sysfile, self.name) +# cd_old = LegacyCommonData.ReadFile(str(self.datafile), str(self.sysfile)) +# cd.old = cd_old # DEBUG + return cd @property def plot_kinlabels(self): @@ -472,7 +436,8 @@ def __init__(self, *, name, commondata, fkspecs, thspec, cuts, @functools.lru_cache() def load(self): - cd = self.commondata.load() + """Load the libNNPDF version of the dataset""" + cd = LegacyCommonData.ReadFile(str(self.commondata.datafile), str(self.commondata.sysfile)) fktables = [] for p in self.fkspecs: @@ -508,7 +473,9 @@ def load_commondata(self): loaded_cuts = self.cuts.load() if not (hasattr(loaded_cuts, '_full') and loaded_cuts._full): intmask = [int(ele) for ele in loaded_cuts] - cd = CommonData(cd, intmask) +# cd_old = LegacyCommonData(cd.old, intmask) + cd = cd.with_cuts(intmask) +# cd.old = cd_old # DEBUG return cd def to_unweighted(self): diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 5e73e3777f..b27b7ce657 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -8,6 +8,8 @@ import numpy as np import pandas as pd +KINNAMES = ["kin1", "kin2", "kin3"] + @dataclasses.dataclass(eq=False) class FKTableData: @@ -175,6 +177,7 @@ def get_np_fktable(self): return fktable + @dataclasses.dataclass(eq=False) class CFactorData: """ @@ -218,15 +221,9 @@ class CommonData: nkin : int Number of kinematics specified - kinematics : list of str with length nkin - Kinematic variables kin1, kin2, kin3 ... - nsys : int Number of systematics - sysid : list of str with length nsys - ID for systematic - commondata_table : pd.DataFrame Pandas dataframe containing the commondata @@ -235,6 +232,9 @@ class CommonData: for each systematic alongside the uncertainty type (ADD/MULT/RAND) and name (CORR/UNCORR/THEORYCORR/SKIP) + + systematics_table: pd.DataFrame + Panda dataframe containing the table of systematics """ setname: str @@ -248,7 +248,7 @@ class CommonData: def __post_init__(self): self.systematics_table = self.commondata_table.drop( - columns=["process", "kin1", "kin2", "kin3", "data", "stat"] + columns=["process", "data", "stat"] + KINNAMES ) def with_cuts(self, cuts): @@ -284,10 +284,20 @@ def with_cuts(self, cuts): new_commondata_table = self.commondata_table.loc[cuts] return dataclasses.replace(self, ndata=newndata, commondata_table=new_commondata_table) + @property + def kinematics(self): + return self.commondata_table[KINNAMES] + + def get_kintable(self): + return self.kinematics.values + @property def central_values(self): return self.commondata_table["data"] + def get_cv(self): + return self.central_values.values + @property def stat_errors(self): return self.commondata_table["stat"] @@ -353,3 +363,22 @@ def systematic_errors(self, central_values=None): central_values = self.central_values.to_numpy() converted_mult_errors = self.multiplicative_errors * central_values[:, np.newaxis] / 100 return pd.concat((self.additive_errors, converted_mult_errors), axis=1) + + def export(self, path): + """Export the data, and error types + Use the same format as libNNPDF: + + - A DATA_.dat file with the dataframe of accepted points + - A systypes/STYPES_.dat file with the error types + """ + dat_path = path / f"DATA_{self.setname}.dat" + sys_path = path / "systypes" / f"SYSTYPE_{self.setname}_DEFAULT.dat" + sys_path.parent.mkdir(exist_ok=True) + + dat_string_raw = self.commondata_table.to_string(index=False, header=False, float_format="{:.8e}".format) + header = f"{self.setname} {self.nsys} {self.ndata}" + dat_string = "\n".join([f" {i+1} {r}" for i, r in enumerate(dat_string_raw.split("\n"))]) + dat_path.write_text(f"{header}\n{dat_string}\n") + + sys_raw = self.systype_table.to_string(index=True, header=False, index_names=False) + sys_path.write_text(f"{self.nsys}\n{sys_raw}\n") diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 913f883e8b..6fc5b76f4f 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -9,13 +9,39 @@ import numpy as np -from NNPDF import CommonData from reportengine.checks import make_argcheck, check, check_positive, make_check from reportengine.compat import yaml import validphys.cuts log = logging.getLogger(__name__) +KIN_LABEL = { + "DIS": ("x", "Q2", "y"), + "DYP": ("y", "M2", "sqrts"), + "JET": ("eta", "p_T2", "sqrts"), + "DIJET": ("eta", "m_12", "sqrts"), + "PHT": ("eta_gamma", "E_{T,gamma)2", "sqrts"), + "INC": ("0", "mu2", "sqrts"), + "EWK_RAP": ("etay", "M2", "sqrts"), + "EWK_PT": ("p_T", "M2", "sqrts"), + "EWK_PTRAP": ("etay", "p_T2", "sqrts"), + "EWK_MLL": ("M_ll", "M_ll2", "sqrts"), + "EWJ_RAP": ("etay", "M2", "sqrts"), + "EWJ_PT": ("p_T", "M2", "sqrt(s)"), + "EWJ_PTRAP": ("etay", "p_T2", "sqrts"), + "EWJ_JRAP": ("etay", "M2", "sqrts"), + "EWJ_JPT": ("p_T", "M2", "sqrts"), + "EWJ_MLL": ("M_ll", "M_ll2", "sqrts"), + "HQP_YQQ": ("yQQ", "mu2", "sqrts"), + "HQP_MQQ": ("MQQ", "mu2", "sqrts"), + "HQP_PTQQ": ("p_TQQ", "mu2", "sqrts"), + "HQP_YQ": ("yQ", "mu2", "sqrts"), + "HQP_PTQ": ("p_TQ", "mu2", "sqrts"), + "HIG_RAP": ("y", "M_H2", "sqrts"), + "SIA": ("z", "Q2", "y"), +} + + class RuleProcessingError(Exception): """Exception raised when we couldn't process a rule.""" @@ -164,7 +190,7 @@ def _filter_real_data(filter_path, data): nfull, ncut = _write_ds_cut_data(path, dataset) total_data_points += nfull total_cut_data_points += ncut - dataset.load_commondata().Export(str(path)) + dataset.load_commondata().export(path) return total_data_points, total_cut_data_points @@ -343,14 +369,14 @@ def __init__( f"Could not find dataset {self.dataset}" ) from e if cd.process_type[:3] == "DIS": - self.variables = CommonData.kinLabel["DIS"] + self.variables = KIN_LABEL["DIS"] else: - self.variables = CommonData.kinLabel[cd.process_type] + self.variables = KIN_LABEL[cd.process_type] else: if self.process_type[:3] == "DIS": - self.variables = CommonData.kinLabel["DIS"] + self.variables = KIN_LABEL["DIS"] else: - self.variables = CommonData.kinLabel[self.process_type] + self.variables = KIN_LABEL[self.process_type] if hasattr(self, "local_variables"): if not isinstance(self.local_variables, Mapping): @@ -422,19 +448,21 @@ def __hash__(self): return hash(self._properties) def __call__(self, dataset, idat): - central_value = dataset.GetData(idat) + central_value = dataset.get_cv()[idat] + process_name = dataset.commondataproc + # We return None if the rule doesn't apply. This # is different to the case where the rule does apply, # but the point was cut out by the rule. if ( - dataset.GetSetName() != self.dataset - and dataset.GetProc(idat) != self.process_type + dataset.setname != self.dataset + and process_name != self.process_type and self.process_type != "DIS_ALL" ): return None # Handle the generalised DIS cut - if self.process_type == "DIS_ALL" and dataset.GetProc(idat)[:3] != "DIS": + if self.process_type == "DIS_ALL" and not process_name.startswith("DIS"): return None ns = self._make_point_namespace(dataset, idat) @@ -468,7 +496,7 @@ def __repr__(self): # pragma: no cover def _make_kinematics_dict(self, dataset, idat) -> dict: """Fill in a dictionary with the kinematics for each point""" - kinematics = [dataset.GetKinematics(idat, j) for j in range(3)] + kinematics = dataset.kinematics.values[idat] return dict(zip(self.variables, kinematics)) def _make_point_namespace(self, dataset, idat) -> dict: @@ -488,7 +516,7 @@ def get_cuts_for_dataset(commondata, rules) -> list: Parameters ---------- - commondata: NNPDF CommonData spec + commondata: :py:class:`validphys.coredata.CommonData` rules: List[Rule] A list of Rule objects specifying the filters. @@ -515,7 +543,7 @@ def get_cuts_for_dataset(commondata, rules) -> list: dataset = commondata.load() mask = [] - for idat in range(dataset.GetNData()): + for idat in range(dataset.ndata): broken = False for rule in rules: rule_result = rule(dataset, idat) diff --git a/validphys2/src/validphys/fitdata.py b/validphys2/src/validphys/fitdata.py index d518efcccd..2cf35d561d 100644 --- a/validphys2/src/validphys/fitdata.py +++ b/validphys2/src/validphys/fitdata.py @@ -440,10 +440,10 @@ def print_systype_overlap(groups_commondata, group_dataset_inputs_by_metadata): systype_groups = dict() for group_cd, group in zip(groups_commondata, group_dataset_inputs_by_metadata): systype_groups[group["group_name"]] = { - cd.load().GetSys(0, i).name + cd.load().systype_table.iloc[i]["name"] for cd in group_cd for i in range(cd.nsys) - if cd.load().GetSys(0, i).name not in allow_list + if cd.load().systype_table.iloc[i]["name"] not in allow_list } systype_overlap = set() diff --git a/validphys2/src/validphys/mc_gen.py b/validphys2/src/validphys/mc_gen.py index ff68d59c88..bf94b5b478 100644 --- a/validphys2/src/validphys/mc_gen.py +++ b/validphys2/src/validphys/mc_gen.py @@ -5,6 +5,7 @@ Tools to check the pseudo-data MC generation. """ # The functions in this module have been ported to not use libNNPDF +# but is still using it under the hood # it has been a direct port of the libnnpdf dependent structure # so they should not be used as an example import logging diff --git a/validphys2/src/validphys/plotoptions/core.py b/validphys2/src/validphys/plotoptions/core.py index d96aef7290..cf0571b811 100644 --- a/validphys2/src/validphys/plotoptions/core.py +++ b/validphys2/src/validphys/plotoptions/core.py @@ -19,8 +19,9 @@ from reportengine.compat import yaml from reportengine.utils import get_functions, ChainMap -from NNPDF import CommonData, DataSet +from NNPDF import DataSet from validphys.core import CommonDataSpec, DataSetSpec, Cuts, InternalCutsWrapper +from validphys.coredata import CommonData from validphys.plotoptions.utils import apply_to_all_columns, get_subclasses from validphys.plotoptions import labelers, kintransforms, resulttransforms from validphys.utils import parse_yaml_inp diff --git a/validphys2/src/validphys/tests/test_loader.py b/validphys2/src/validphys/tests/test_loader.py index decf0a020c..4af8fcf1e8 100644 --- a/validphys2/src/validphys/tests/test_loader.py +++ b/validphys2/src/validphys/tests/test_loader.py @@ -42,8 +42,8 @@ def test_rebuild_commondata_without_cuts(tmp_path_factory, arg): cutpath = tmp / "cuts.txt" np.savetxt(cutpath, np.asarray(cuts, dtype=int), fmt="%u") cutspec = Cuts(cd, cutpath) - lcd = type(lcd)(lcd, cuts) - lcd.Export(str(tmp)) + lcd = lcd.with_cuts(cuts) + lcd.export(tmp) # We have to reconstruct the name here... with_cuts = tmp / f"DATA_{cd.name}.dat" newpath = tmp / "commondata.dat" diff --git a/validphys2/src/validphys/tests/test_weights.py b/validphys2/src/validphys/tests/test_weights.py index 0c1043fa44..134cf47e2f 100644 --- a/validphys2/src/validphys/tests/test_weights.py +++ b/validphys2/src/validphys/tests/test_weights.py @@ -8,9 +8,15 @@ def test_weights_have_same_commondata(weighted_data_witht0_config): data = API.data(**weighted_data_witht0_config) normal, weighted = data.datasets - normalds, weightedds = normal.load(), weighted.load() - assert normalds.GetSys(0, 0).mult == weightedds.GetSys(0, 0).mult - assert normalds.GetSys(0, 0).add == weightedds.GetSys(0, 0).add + normalds, weightedds = normal.load_commondata(), weighted.load_commondata() + assert ( + normalds.systematics_table["MULT"].iloc[0][0] + == weightedds.systematics_table["MULT"].iloc[0][0] + ) + assert ( + normalds.systematics_table["ADD"].iloc[0][0] + == weightedds.systematics_table["ADD"].iloc[0][0] + ) def test_chi2_arithmetic(weighted_data_witht0_internal_cuts_config): From e8e9e4497e5deae2613878cecfe9370bf69d8133 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Fri, 16 Dec 2022 10:23:27 +0100 Subject: [PATCH 2/7] update missing libnnpdf methods --- validphys2/src/validphys/dataplots.py | 7 ++++--- validphys2/src/validphys/theorycovariance/tests.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/dataplots.py b/validphys2/src/validphys/dataplots.py index 80e2709a7e..31b7fb2062 100644 --- a/validphys2/src/validphys/dataplots.py +++ b/validphys2/src/validphys/dataplots.py @@ -960,12 +960,13 @@ def plot_positivity(pdfs, positivity_predictions_for_pdfs, posdataset, pos_use_k ax.axhline(0, color='red') posset = posdataset.load_commondata() - ndata = posset.GetNData() + ndata = posset.ndata xvals = [] if pos_use_kin: - ax.set_xlabel('kin1') - xvals = [posset.GetKinematics(i, 0) for i in range(0, ndata)] + kin_name = "kin1" + ax.set_xlabel(kin_name) + xvals = posset.kinematics[kin_name].values else: ax.set_xlabel('idat') xvals = np.arange(ndata) diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index 1a90302cac..6dd14d85c2 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -219,7 +219,7 @@ def all_matched_data_lengths(all_matched_datasets): """Returns a list of the data sets lengths.""" lens = [] for rlist in all_matched_datasets: - lens.append(rlist[0].load_commondata().GetNData()) + lens.append(rlist[0].load_commondata().ndata) return lens From 609d6c862702ea19d09228f7ff038cbd3cb373c2 Mon Sep 17 00:00:00 2001 From: "Juan M. Cruz-Martinez" Date: Tue, 17 Jan 2023 12:48:49 +0400 Subject: [PATCH 3/7] Apply suggestions from code review Co-authored-by: Alessandro Candido --- validphys2/src/validphys/core.py | 7 +------ validphys2/src/validphys/filters.py | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 98d5fe8990..59be563b0f 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -275,10 +275,7 @@ def __iter__(self): @functools.lru_cache() def load(self): - cd = parse_commondata(self.datafile, self.sysfile, self.name) -# cd_old = LegacyCommonData.ReadFile(str(self.datafile), str(self.sysfile)) -# cd.old = cd_old # DEBUG - return cd + return parse_commondata(self.datafile, self.sysfile, self.name) @property def plot_kinlabels(self): @@ -473,9 +470,7 @@ def load_commondata(self): loaded_cuts = self.cuts.load() if not (hasattr(loaded_cuts, '_full') and loaded_cuts._full): intmask = [int(ele) for ele in loaded_cuts] -# cd_old = LegacyCommonData(cd.old, intmask) cd = cd.with_cuts(intmask) -# cd.old = cd_old # DEBUG return cd def to_unweighted(self): diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 6fc5b76f4f..1669772a39 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -516,7 +516,7 @@ def get_cuts_for_dataset(commondata, rules) -> list: Parameters ---------- - commondata: :py:class:`validphys.coredata.CommonData` + commondata: validphys.coredata.CommonData rules: List[Rule] A list of Rule objects specifying the filters. From c8e9cf61fd240f97399349f9a856ae90f82e6ee1 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 17 Jan 2023 15:11:41 +0400 Subject: [PATCH 4/7] apply review comments --- validphys2/src/validphys/commondataparser.py | 29 ++++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index bf6966b13d..f4459dc0b1 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -6,7 +6,7 @@ The validphys commondata structure is an instance of :py:class:`validphys.coredata.CommonData` """ -from collections import namedtuple +import dataclasses from operator import attrgetter import logging @@ -16,7 +16,7 @@ log = logging.getLogger(__name__) -kinlabels_latex = { +KINLABEL_LATEX = { "DIJET": ("\\eta", "$\\m_{1,2} (GeV)", "$\\sqrt{s} (GeV)"), "DIS": ("$x$", "$Q^2 (GeV^2)$", "$y$"), "DYP": ("$y$", "$M^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), @@ -43,9 +43,6 @@ } -_kinlabels_keys = sorted(kinlabels_latex, key=len, reverse=True) - - def load_commondata(spec): """ Load the data corresponding to a CommonDataSpec object. @@ -124,12 +121,18 @@ def parse_systypes(systypefile): return systypetable -CommonDataMetadata = namedtuple("CommonDataMetadata", ("name", "nsys", "ndata", "process_type")) +@dataclasses.dataclass +class CommonDataMetadata: + """Contains metadata information about the data being read""" + name: str + nsys: int + ndata: int + process_type: str def peek_commondata_metadata(commondatafilename): - """Check some basic properties commondata object without going though the - trouble of processing it on the C++ side""" + """Read some of the properties of the commondata object as a CommonData Metadata + """ with open(commondatafilename) as f: try: l = f.readline() @@ -149,15 +152,17 @@ def get_plot_kinlabels(commondata): """Return the LaTex kinematic labels for a given Commondata""" key = commondata.process_type - return kinlabels_latex[key] + return KINLABEL_LATEX[key] def get_kinlabel_key(process_label): - # Since there is no 1:1 correspondence between latex keys and GetProc, - # we match the longest key such that the proc label starts with it. + """ + Since there is no 1:1 correspondence between latex keys and GetProc, + we match the longest key such that the proc label starts with it. + """ l = process_label try: - return next(k for k in _kinlabels_keys if l.startswith(k)) + return next(k for k in sorted(KINLABEL_LATEX, key=len, reverse=True) if l.startswith(k)) except StopIteration as e: raise ValueError( "Could not find a set of kinematic " From 5ae445cb428969990cef2a0f04a49a7fa7f7bb4f Mon Sep 17 00:00:00 2001 From: "Juan M. Cruz-Martinez" Date: Tue, 17 Jan 2023 15:46:06 +0400 Subject: [PATCH 5/7] Update validphys2/src/validphys/commondataparser.py --- validphys2/src/validphys/commondataparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index f4459dc0b1..7da1f038da 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -121,7 +121,7 @@ def parse_systypes(systypefile): return systypetable -@dataclasses.dataclass +@dataclasses.dataclass(frozen=True) class CommonDataMetadata: """Contains metadata information about the data being read""" name: str From a159219eb724bafdc252b7829beb34deb69225c7 Mon Sep 17 00:00:00 2001 From: "Juan M. Cruz-Martinez" Date: Tue, 17 Jan 2023 23:21:26 +0400 Subject: [PATCH 6/7] Apply suggestions from code review Co-authored-by: Roy Stegeman --- validphys2/src/validphys/commondataparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 7da1f038da..1611a1db65 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -2,7 +2,7 @@ This module implements parsers for commondata and systype files into useful datastructures, contained in the :py:mod:`validphys.coredata` module, which are not backed by C++ managed memory, and so they can be easily pickled and -interfaces with common Python libraries. +interfaced with common Python libraries. The validphys commondata structure is an instance of :py:class:`validphys.coredata.CommonData` """ From df13d189c9fbab373494fe332841011eda0c4527 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 17 Jan 2023 23:29:58 +0400 Subject: [PATCH 7/7] kinnames -> kin_names --- validphys2/src/validphys/coredata.py | 6 +++--- validphys2/src/validphys/dataplots.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index b27b7ce657..2735606197 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -KINNAMES = ["kin1", "kin2", "kin3"] +KIN_NAMES = ["kin1", "kin2", "kin3"] @dataclasses.dataclass(eq=False) @@ -248,7 +248,7 @@ class CommonData: def __post_init__(self): self.systematics_table = self.commondata_table.drop( - columns=["process", "data", "stat"] + KINNAMES + columns=["process", "data", "stat"] + KIN_NAMES ) def with_cuts(self, cuts): @@ -286,7 +286,7 @@ def with_cuts(self, cuts): @property def kinematics(self): - return self.commondata_table[KINNAMES] + return self.commondata_table[KIN_NAMES] def get_kintable(self): return self.kinematics.values diff --git a/validphys2/src/validphys/dataplots.py b/validphys2/src/validphys/dataplots.py index 31b7fb2062..b1f54a0344 100644 --- a/validphys2/src/validphys/dataplots.py +++ b/validphys2/src/validphys/dataplots.py @@ -26,6 +26,7 @@ from validphys.plotoptions import get_info, kitable, transform_result from validphys import plotutils from validphys.utils import sane_groupby_iter, split_ranges, scale_from_grid +from validphys.coredata import KIN_NAMES log = logging.getLogger(__name__) @@ -964,7 +965,7 @@ def plot_positivity(pdfs, positivity_predictions_for_pdfs, posdataset, pos_use_k xvals = [] if pos_use_kin: - kin_name = "kin1" + kin_name = KIN_NAMES[0] ax.set_xlabel(kin_name) xvals = posset.kinematics[kin_name].values else: