Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
fb3bd05
rules are tuple instead of list -> makes them hashable
comane May 10, 2024
df6a42b
return empty tuple instead of None -> None is not iterable
comane May 11, 2024
82cd476
added FilterDefaults frozen dataclass
comane May 11, 2024
90f04da
filter defaults returns FilterDefaults dataclass instead of unhashabl…
comane May 11, 2024
a9b7e1c
make inputs to produce_rules hashable types. no need for freeze args
comane May 11, 2024
a600a3f
import validphys.filters at top of module
comane May 12, 2024
a256b45
parse_filter_rules returns tuple of frozendicts
comane May 12, 2024
ad75add
check_default_filter_rules returns tuple
comane May 12, 2024
9d08c61
applied review changes
comane May 13, 2024
0c085e0
Introduced FilterRule and AddedFilterRule objects
comane May 13, 2024
2d022aa
removed frozendict import in config
comane May 13, 2024
fd9024f
applied comments of review
comane May 14, 2024
4bf928d
Update validphys2/src/validphys/config.py
comane May 14, 2024
d9e3c18
Update validphys2/src/validphys/config.py
comane May 14, 2024
d542c7d
Rule accepts both FilterRule objects as well as Mappings
comane May 14, 2024
7da9282
Update validphys2/src/validphys/filters.py
comane May 14, 2024
610a142
to_dict method of DefaultFilters also takes default None values
comane May 14, 2024
214a75a
restored previous behaviour of produce_defaults function
comane May 16, 2024
bbfe2c9
removed freeze args from get_cuts_for_dataset as args are now hashable
comane May 18, 2024
8858d30
removed freeze_args and make_hashable functions as they are not used …
comane May 18, 2024
feae109
removed frozendict dependency from pyproject and conda-recipe/meta.ya…
comane May 18, 2024
3d7f319
pass rules as tuple in test
comane May 18, 2024
c2b5659
Update validphys2/src/validphys/config.py
comane May 20, 2024
fac5218
Update validphys2/src/validphys/config.py
comane May 20, 2024
3fd9ec5
Merge branch 'master' into hashable_rules
comane May 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ requirements:
- pineappl >=0.7.3
- eko >=0.14.2
- fiatlux
- frozendict # needed for caching of data loading
- sphinx >=5.0.2,<6 # documentation. Needs pinning temporarily due to markdown
- recommonmark
- sphinx_rtd_theme >0.5
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ pandas = "*"
numpy = "*"
validobj = "*"
prompt_toolkit = "*"
frozendict = "*" # validphys: needed for caching of data loading
# Reportengine needs to be installed from git
reportengine = { git = "https://github.com/NNPDF/reportengine" }
# Fit
Expand Down
82 changes: 55 additions & 27 deletions validphys2/src/validphys/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import numbers
import pathlib

from frozendict import frozendict
import pandas as pd

from nnpdf_data import legacy_to_new_map
Expand Down Expand Up @@ -41,7 +40,15 @@
from validphys.paramfits.config import ParamfitsConfig
from validphys.plotoptions.core import get_info
import validphys.scalevariations
from validphys.utils import freeze_args
from validphys.filters import (FilterDefaults,
AddedFilterRule,
FilterRule,
default_filter_settings_input,
Rule,
RuleProcessingError,
default_filter_rules_input
)


log = logging.getLogger(__name__)

Expand Down Expand Up @@ -1258,10 +1265,10 @@ def load_default_default_filter_rules(self, spec):
)

def parse_filter_rules(self, filter_rules: (list, type(None))):
"""A list of filter rules. See https://docs.nnpdf.science/vp/filters.html
for details on the syntax"""
"""A tuple of FilterRule objects. Rules are immutable after parsing.
See https://docs.nnpdf.science/vp/filters.html for details on the syntax"""
log.warning("Overwriting filter rules")
return filter_rules
return tuple(FilterRule(**rule) for rule in filter_rules) if filter_rules else None

def parse_default_filter_rules_recorded_spec_(self, spec):
"""This function is a hacky fix for parsing the recorded spec
Expand All @@ -1271,12 +1278,12 @@ def parse_default_filter_rules_recorded_spec_(self, spec):
return spec

def parse_added_filter_rules(self, rules: (list, type(None)) = None):
return rules
"""
Returns a tuple of AddedFilterRule objects. Rules are immutable after parsing.
AddedFilterRule objects inherit from FilterRule objects.
Comment thread
scarlehoff marked this conversation as resolved.
"""
return tuple(AddedFilterRule(**rule) for rule in rules) if rules else None

# Every parallel replica triggers a series of calls to this function,
# which should not happen since the rules are identical among replicas.
# E.g for NNPDF4.0 with 2 parallel replicas 693 calls, 3 parallel replicas 1001 calls...
@freeze_args
Comment thread
comane marked this conversation as resolved.
@functools.lru_cache
def produce_rules(
self,
Expand All @@ -1286,10 +1293,9 @@ def produce_rules(
default_filter_rules=None,
filter_rules=None,
default_filter_rules_recorded_spec_=None,
added_filter_rules: (list, type(None)) = None,
added_filter_rules: (tuple, type(None)) = None,
):
"""Produce filter rules based on the user defined input and defaults."""
from validphys.filters import Rule, RuleProcessingError, default_filter_rules_input

theory_parameters = theoryid.get_description()

Expand Down Expand Up @@ -1317,8 +1323,7 @@ def produce_rules(

if added_filter_rules:
for i, rule in enumerate(added_filter_rules):
if not isinstance(rule, (dict, frozendict)):
raise ConfigError(f"added rule {i} is not a dict")

try:
rule_list.append(
Rule(
Expand All @@ -1331,7 +1336,7 @@ def produce_rules(
except RuleProcessingError as e:
raise ConfigError(f"Error processing added rule {i}: {e}") from e

return rule_list
return tuple(rule_list)

@configparser.record_from_defaults
def parse_default_filter_settings(self, spec: (str, type(None))):
Expand Down Expand Up @@ -1359,45 +1364,66 @@ def load_default_default_filter_settings(self, spec):
def parse_filter_defaults(self, filter_defaults: (dict, type(None))):
"""A mapping containing the default kinematic limits to be used when
filtering data (when using internal cuts).
Currently these limits are ``q2min`` and ``w2min``.
Currently these limits are ``q2min``, ``w2min``, and ``maxTau``.

Parameters
----------
filter_defaults: dict, None
A mapping containing the default kinematic limits to be used when
filtering data (when using internal cuts).
Currently these limits are ``q2min``, ``w2min``, and ``maxTau``.

Returns
-------
FilterDefaults
A hashable object containing the default kinematic limits to be used when
filtering data (when using internal cuts).
Currently these limits are ``q2min``, ``w2min``, and ``maxTau``.
"""
log.warning("Overwriting filter defaults")
return filter_defaults
parsed_filter_defaults = FilterDefaults(**filter_defaults)
return parsed_filter_defaults

def produce_defaults(
self,
q2min=None,
w2min=None,
maxTau=None,
default_filter_settings=None,
filter_defaults={},
filter_defaults=None,
default_filter_settings_recorded_spec_=None,
):
"""Produce default values for filters taking into account the
values of ``q2min``, ``w2min`` and ``maxTau`` defined at namespace
level and those inside a ``filter_defaults`` mapping.

Within this function the hashable type FilterDefaults is turned into
a dictionary so as to allow for overwriting of the values of q2min, w2min and maxTau.
The dictionary is then turned back into a FilterDefaults object.
"""
from validphys.filters import default_filter_settings_input
if filter_defaults is None:
filter_defaults = {}

if isinstance(filter_defaults, FilterDefaults):
filter_defaults = filter_defaults.to_dict()

if q2min is not None and "q2min" in filter_defaults and q2min != filter_defaults["q2min"]:
raise ConfigError("q2min defined multiple times with different values")

if w2min is not None and "w2min" in filter_defaults and w2min != filter_defaults["w2min"]:
raise ConfigError("w2min defined multiple times with different values")

if (
maxTau is not None
and "maxTau" in filter_defaults
and maxTau != filter_defaults["maxTau"]
):
if maxTau is not None and filter_defaults.get("maxTau", maxTau) != maxTau:
raise ConfigError("maxTau defined multiple times with different values")

if default_filter_settings_recorded_spec_ is not None:
filter_defaults = default_filter_settings_recorded_spec_[default_filter_settings]
filter_defaults = FilterDefaults(**default_filter_settings_recorded_spec_[default_filter_settings])
# If we find recorded specs return immediately and don't read q2min and w2min
# from runcard
return filter_defaults
elif not filter_defaults:
filter_defaults = default_filter_settings_input()
# if filter_defaults have not been set, load the defaults with default_filter_settings_input
filter_defaults = default_filter_settings_input().to_dict()
defaults_loaded = True
else:
defaults_loaded = False
Expand All @@ -1413,7 +1439,9 @@ def produce_defaults(
if maxTau is not None and defaults_loaded:
log.warning("Using maxTau from runcard")
filter_defaults["maxTau"] = maxTau


# Turn the dictionary back into a hashable FilterDefaults object
filter_defaults = FilterDefaults(**filter_defaults)
return filter_defaults

def produce_data(self, data_input, *, group_name="data"):
Expand Down
73 changes: 64 additions & 9 deletions validphys2/src/validphys/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@
from importlib.resources import read_text
import logging
import re
import dataclasses
from typing import Union

import numpy as np

from reportengine.checks import check, make_check
from reportengine.compat import yaml
import validphys.cuts
from validphys.process_options import PROCESSES
from validphys.utils import freeze_args, generate_path_filtered_data
from validphys.utils import generate_path_filtered_data

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -99,18 +101,63 @@ class FatalRuleError(Exception):
"""Exception raised when a rule application failed at runtime."""


@dataclasses.dataclass(frozen=True)
class FilterDefaults:
"""
Dataclass carrying default values for filters (cuts) taking into
account the values of ``q2min``, ``w2min`` and ``maxTau``.
"""
q2min: float = None
w2min: float = None
maxTau: float = None

def to_dict(self):
return dataclasses.asdict(self)


@dataclasses.dataclass(frozen=True)
class FilterRule:
"""
Dataclass which carries the filter rule information.
"""
dataset: str = None
process_type: str = None
rule: str = None
reason: str = None
local_variables: Mapping[str, Union[str, float]] = None
PTO: str = None
FNS: str = None
IC: str = None

def to_dict(self):
rule_dict = dataclasses.asdict(self)
filtered_dict = {k: v for k, v in rule_dict.items() if v is not None}
return filtered_dict


@dataclasses.dataclass(frozen=True)
class AddedFilterRule(FilterRule):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the purpose of this class?

"""
Dataclass which carries extra filter rule that is added to the
default rule.
"""
pass
Comment thread
scarlehoff marked this conversation as resolved.


def default_filter_settings_input():
"""Return a dictionary with the default hardcoded filter settings.
"""Return a FilterDefaults dataclass with the default hardcoded filter settings.
These are defined in ``defaults.yaml`` in the ``validphys.cuts`` module.
"""
return yaml.safe_load(read_text(validphys.cuts, "defaults.yaml"))
return FilterDefaults(**yaml.safe_load(read_text(validphys.cuts, "defaults.yaml")))


def default_filter_rules_input():
"""Return a dictionary with the input settings.
"""
Return a tuple of FilterRule objects.
These are defined in ``filters.yaml`` in the ``validphys.cuts`` module.
"""
return yaml.safe_load(read_text(validphys.cuts, "filters.yaml"))
list_rules = yaml.safe_load(read_text(validphys.cuts, "filters.yaml"))
return tuple(FilterRule(**rule) for rule in list_rules)


def check_nonnegative(var: str):
Expand Down Expand Up @@ -457,10 +504,19 @@ class Rule:

numpy_functions = {"sqrt": np.sqrt, "log": np.log, "fabs": np.fabs}

def __init__(self, initial_data: dict, *, defaults: dict, theory_parameters: dict, loader=None):
def __init__(self, initial_data: FilterRule, *, defaults: dict, theory_parameters: dict, loader=None):
self.dataset = None
self.process_type = None
self._local_variables_code = {}

# For compatibility with legacy code that passed a dictionary
if isinstance(initial_data, FilterRule):
initial_data = initial_data.to_dict()
elif isinstance(initial_data, Mapping):
initial_data = dict(initial_data)
else:
raise RuleProcessingError("Expecting initial_data to be an instance of a FilterRule dataclass.")

for key in initial_data:
setattr(self, key, initial_data[key])

Expand Down Expand Up @@ -511,7 +567,7 @@ def __init__(self, initial_data: dict, *, defaults: dict, theory_parameters: dic
self.rule_string = self.rule
self.defaults = defaults
self.theory_params = theory_parameters
ns = {*self.numpy_functions, *self.defaults, *self.variables, "idat", "central_value"}
ns = {*self.numpy_functions, *self.defaults.to_dict().keys(), *self.variables, "idat", "central_value"}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
ns = {*self.numpy_functions, *self.defaults.to_dict().keys(), *self.variables, "idat", "central_value"}
ns = {*self.numpy_functions, *self.defaults.to_dict(), *self.variables, "idat", "central_value"}

Not that it makes a difference though.

for k, v in self.local_variables.items():
try:
self._local_variables_code[k] = lcode = compile(
Expand Down Expand Up @@ -592,7 +648,7 @@ def __call__(self, dataset, idat):
return eval(
self.rule,
self.numpy_functions,
{**{"idat": idat, "central_value": central_value}, **self.defaults, **ns},
{**{"idat": idat, "central_value": central_value}, **self.defaults.to_dict(), **ns},
)
except Exception as e: # pragma: no cover
raise FatalRuleError(f"Error when applying rule {self.rule_string!r}: {e}") from e
Expand Down Expand Up @@ -628,7 +684,6 @@ def _make_point_namespace(self, dataset, idat) -> dict:
return ns


@freeze_args
@functools.lru_cache
def get_cuts_for_dataset(commondata, rules) -> list:
"""Function to generate a list containing the index
Expand Down
4 changes: 2 additions & 2 deletions validphys2/src/validphys/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,10 +675,10 @@ def check_default_filter_rules(self, theoryid, defaults=None):
th_params = theoryid.get_description()
if defaults is None:
defaults = default_filter_settings_input()
return [
return tuple(
Rule(inp, defaults=defaults, theory_parameters=th_params, loader=self)
for inp in default_filter_rules_input()
]
)

def _check_theory_old_or_new(self, theoryid, commondata, cfac):
"""Given a theory and a commondata and a theory load the right fktable
Expand Down
4 changes: 2 additions & 2 deletions validphys2/src/validphys/tests/test_filter_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def test_good_rules():
dsnames = ['ATLAS_1JET_8TEV_R06_PTY', 'NMC_NC_NOTFIXED_DW_EM-F2']
for dsname in dsnames:
ds = l.check_dataset(
dsname, cuts='internal', rules=rules, theoryid=THEORYID, variant="legacy"
dsname, cuts='internal', rules=tuple(rules), theoryid=THEORYID, variant="legacy"
)
assert ds.cuts.load() is not None

Expand All @@ -120,7 +120,7 @@ def test_added_rules():
{
"speclabel": "fewer data",
"added_filter_rules": [
{"dataset": "ATLAS_1JET_8TEV_R06_PTY", "rule": "pT < 1000", "reson": "pt cut"}
{"dataset": "ATLAS_1JET_8TEV_R06_PTY", "rule": "pT < 1000", "reason": "pt cut"}
Comment thread
comane marked this conversation as resolved.
],
},
{
Expand Down
31 changes: 0 additions & 31 deletions validphys2/src/validphys/utils.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,11 @@
import contextlib
import functools
import pathlib
import shutil
import tempfile
from typing import Any, Hashable, Mapping, Sequence

from frozendict import frozendict
import numpy as np


def make_hashable(obj: Any):
# So that we don't infinitely recurse since frozenset and tuples
# are Sequences.
if isinstance(obj, Hashable):
return obj
elif isinstance(obj, Mapping):
return frozendict(obj)
elif isinstance(obj, Sequence):
return tuple([make_hashable(i) for i in obj])
else:
raise ValueError("Object is not hashable")


def freeze_args(func):
"""Transform mutable dictionary
Into immutable
Useful to be compatible with cache
"""

@functools.wraps(func)
def wrapped(*args, **kwargs):
args = tuple([make_hashable(arg) for arg in args])
kwargs = {k: make_hashable(v) for k, v in kwargs.items()}
return func(*args, **kwargs)

return wrapped


def generate_path_filtered_data(fit_path, setname):
"""Utility to ensure that both the loader and tools like setupfit utilize the same convention
to generate the names of generated pseudodata"""
Expand Down