From 8661864b69dc267bbbc2e047815eae8c6f5dd323 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Wed, 22 May 2024 12:14:20 +0200 Subject: [PATCH 1/5] make eko as a separate resource from the theory ; allowing the eko to be contained within the theory --- n3fit/src/evolven3fit/evolve.py | 7 ++- validphys2/src/validphys/loader.py | 53 +++++++++++++++++++ .../src/validphys/nnprofile_default.yaml | 6 ++- validphys2/src/validphys/scripts/vp_list.py | 14 ++--- 4 files changed, 65 insertions(+), 15 deletions(-) diff --git a/n3fit/src/evolven3fit/evolve.py b/n3fit/src/evolven3fit/evolve.py index 05db89c4f4..51bd491abf 100644 --- a/n3fit/src/evolven3fit/evolve.py +++ b/n3fit/src/evolven3fit/evolve.py @@ -10,7 +10,7 @@ import eko from eko import basis_rotation, runner from reportengine.compat import yaml -from validphys.loader import Loader +from validphys.loader import FallbackLoader as Loader from . import eko_utils, utils @@ -63,13 +63,12 @@ def evolve_fit( stdout_log = logging.StreamHandler(sys.stdout) for log in [log_file, stdout_log]: log.setFormatter(LOGGING_SETTINGS["formatter"]) - + # The log file will get everything log_file.setLevel(LOGGING_SETTINGS["level"]) # While the terminal only up to info stdout_log.setLevel(logging.INFO) - for logger in (_logger, *[logging.getLogger("eko")]): logger.handlers = [] logger.setLevel(LOGGING_SETTINGS["level"]) @@ -87,7 +86,7 @@ def evolve_fit( else: try: _logger.info(f"Loading eko from theory {theoryID}") - eko_path = (Loader().check_theoryID(theoryID).path) / "eko.tar" + eko_path = Loader().check_eko(theoryID) except FileNotFoundError: _logger.warning(f"eko not found in theory {theoryID}, we will construct it") theory, op = eko_utils.construct_eko_cards( diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py index 88017eed68..a75d05143d 100644 --- a/validphys2/src/validphys/loader.py +++ b/validphys2/src/validphys/loader.py @@ -78,6 +78,10 @@ class TheoryNotFound(LoadFailedError): pass +class EkoNotFound(LoadFailedError): + pass + + class TheoryMetadataNotFound(LoadFailedError): pass @@ -301,6 +305,14 @@ def available_theories(self): for folder in self._theories_path.glob(theory_token + '*') } + @property + @functools.lru_cache() + def available_ekos(self): + """Return a string token for each of the available theories""" + return { + eko_path.parent.name.split("_")[1] for eko_path in self._theories_path.glob("*/eko.tar") + } + @property @functools.lru_cache() def _available_old_datasets(self): @@ -510,6 +522,15 @@ def check_theoryID(self, theoryID): ) return TheoryIDSpec(theoryID, theopath, self.theorydb_folder) + @functools.lru_cache() + def check_eko(self, theoryID): + """Check the eko (and the parent theory) both exists and returns the path to it""" + theory = self.check_theoryID(theoryID) + eko_path = theory.path / "eko.tar" + if not eko_path.exists(): + raise EkoNotFound(f"Could not find eko {eko_path} in theory: {theoryID}") + return eko_path + @property def theorydb_folder(self): """Checks theory db file exists and returns path to it""" @@ -1026,6 +1047,16 @@ def theory_urls(self): def theory_index(self): return self.nnprofile['theory_index'] + @property + @_key_or_loader_error + def eko_index(self): + return self.nnprofile['eko_index'] + + @property + @_key_or_loader_error + def eko_urls(self): + return self.nnprofile['eko_urls'] + @property @_key_or_loader_error def nnpdf_pdfs_urls(self): @@ -1091,6 +1122,13 @@ def remote_theories(self): rt = self.remote_files(self.theory_urls, self.theory_index, thing="theories") return {k[len(token) :]: v for k, v in rt.items()} + @property + @functools.lru_cache() + def remote_ekos(self): + token = 'eko_' + rt = self.remote_files(self.eko_urls, self.eko_index, thing="ekos") + return {k[len(token) :]: v for k, v in rt.items()} + @property @functools.lru_cache() def remote_nnpdf_pdfs(self): @@ -1121,6 +1159,10 @@ def downloadable_hyperscans(self): def downloadable_theories(self): return list(self.remote_theories) + @property + def downloadable_ekos(self): + return list(self.remote_ekos) + @property def lhapdf_pdfs(self): return lhaindex.expand_index_names('*') @@ -1293,6 +1335,17 @@ def download_theoryID(self, thid): raise TheoryNotFound("Theory %s not available." % thid) download_and_extract(remote[thid], self._theories_path, target_name=f"theory_{thid}") + def download_eko(self, thid): + """Download the EKO for a given theory ID""" + thid = str(thid) + remote = self.remote_ekos + if thid not in remote: + raise EkoNotFound(f"EKO for TheoryID {thid} is not available in the remote server") + # Check that we have the theory we need + theory = self.check_theoryID(thid) + target_path = theory.path / "eko.tar" + download_file(remote[thid], target_path) + def download_vp_output_file(self, filename, **kwargs): try: root_url = self.nnprofile['reports_root_url'] diff --git a/validphys2/src/validphys/nnprofile_default.yaml b/validphys2/src/validphys/nnprofile_default.yaml index 058273497b..e1c6c7414d 100644 --- a/validphys2/src/validphys/nnprofile_default.yaml +++ b/validphys2/src/validphys/nnprofile_default.yaml @@ -3,7 +3,7 @@ # The location of a custom profile can be given with the `NNPDF_PROFILE_PATH` enviroment variable # otherwise by default ${XDG_CONFIG_HOME}/.config/NNPDF/nnprofile.yaml will be read # which in most systems defaults to `~/.config/NNPDF/nnprofile.yaml` -# +# # # The following defines where NNPDF resources will be stored # The directories for results / theories / hyperscan / validphys are declared as @@ -50,6 +50,10 @@ theory_urls: theory_index: 'theorydata.json' +eko_urls: + - 'https://nnpdf.web.cern.ch/nnpdf/ekos/' +eko_index: 'ekodata.json' + lhapdf_urls: - 'http://lhapdfsets.web.cern.ch/lhapdfsets/current/' nnpdf_pdfs_urls: diff --git a/validphys2/src/validphys/scripts/vp_list.py b/validphys2/src/validphys/scripts/vp_list.py index ae0340005b..5653053c0e 100644 --- a/validphys2/src/validphys/scripts/vp_list.py +++ b/validphys2/src/validphys/scripts/vp_list.py @@ -4,6 +4,7 @@ Script which lists available resources locally and remotely """ + import argparse import fnmatch from functools import partial @@ -11,7 +12,6 @@ import re from reportengine import colors - from validphys.loader import FallbackLoader as L log = logging.getLogger() @@ -59,11 +59,9 @@ def main(command_line=None): attrs = dir(L) - available = [ - attr.lstrip(LOCAL_TOKEN) for attr in attrs if attr.startswith(LOCAL_TOKEN) - ] + available = [attr.removeprefix(LOCAL_TOKEN) for attr in attrs if attr.startswith(LOCAL_TOKEN)] downloadable = [ - attr.lstrip(REMOTE_TOKEN) for attr in attrs if attr.startswith(REMOTE_TOKEN) + attr.removeprefix(REMOTE_TOKEN) for attr in attrs if attr.startswith(REMOTE_TOKEN) ] # set metavar and print choices in help string - otherwise looks ugly. parser.add_argument( @@ -110,13 +108,9 @@ def main(command_line=None): "--regex", type=str, default=None, - help=( - "Filter search using regular expression, only list resources which " - "match pattern." - ), + help="Filter search using regular expression, only list resources which match pattern.", ) - args = parser.parse_args(command_line) results_filter = _get_filter(glob_pattern=args.glob, re_pattern=args.regex) # sane ordering is quite expensive and only really required with theories. From 4aa613cb7f0fcfcf2c8e62a21431f6baabefb04b Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 23 May 2024 16:51:15 +0200 Subject: [PATCH 2/5] add no-net option to eko --- n3fit/src/evolven3fit/cli.py | 2 +- n3fit/src/evolven3fit/evolve.py | 15 +++++++------ n3fit/src/n3fit/scripts/evolven3fit.py | 29 ++++++++++++++++++++------ 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/n3fit/src/evolven3fit/cli.py b/n3fit/src/evolven3fit/cli.py index 1996e0513c..3943ba6319 100644 --- a/n3fit/src/evolven3fit/cli.py +++ b/n3fit/src/evolven3fit/cli.py @@ -2,7 +2,7 @@ def cli_evolven3fit( - configuration_folder, q_fin, q_points, op_card_info, theory_card_info, dump, load, force + configuration_folder, q_fin, q_points, op_card_info, theory_card_info, force, load, dump ): """Evolves the fitted PDFs. diff --git a/n3fit/src/evolven3fit/evolve.py b/n3fit/src/evolven3fit/evolve.py index 51bd491abf..087d3fcd70 100644 --- a/n3fit/src/evolven3fit/evolve.py +++ b/n3fit/src/evolven3fit/evolve.py @@ -10,7 +10,6 @@ import eko from eko import basis_rotation, runner from reportengine.compat import yaml -from validphys.loader import FallbackLoader as Loader from . import eko_utils, utils @@ -25,7 +24,7 @@ def evolve_fit( - fit_folder, q_fin, q_points, op_card_dict, theory_card_dict, force, eko_path=None, dump_eko=None + fit_folder, q_fin, q_points, op_card_dict, theory_card_dict, force, eko_path, dump_eko=None ): """ Evolves all the fitted replica in fit_folder/nnfit @@ -83,17 +82,17 @@ def evolve_fit( if eko_path is not None: eko_path = pathlib.Path(eko_path) _logger.info(f"Loading eko from : {eko_path}") - else: - try: - _logger.info(f"Loading eko from theory {theoryID}") - eko_path = Loader().check_eko(theoryID) - except FileNotFoundError: - _logger.warning(f"eko not found in theory {theoryID}, we will construct it") + + if eko_path is None or not eko_path.exists(): + if dump_eko is not None: + _logger.warning(f"Trying to construct the eko at {dump_eko}") theory, op = eko_utils.construct_eko_cards( theoryID, q_fin, q_points, x_grid, op_card_dict, theory_card_dict ) runner.solve(theory, op, dump_eko) eko_path = dump_eko + else: + raise ValueError(f"dump_eko not provided and {eko_path=} not found") with eko.EKO.edit(eko_path) as eko_op: x_grid_obj = eko.interpolation.XGrid(x_grid) diff --git a/n3fit/src/n3fit/scripts/evolven3fit.py b/n3fit/src/n3fit/scripts/evolven3fit.py index 7d826182c4..4441f0ddd3 100644 --- a/n3fit/src/n3fit/scripts/evolven3fit.py +++ b/n3fit/src/n3fit/scripts/evolven3fit.py @@ -7,12 +7,12 @@ import pathlib import sys -from evolven3fit import cli, eko_utils, evolve +from evolven3fit import cli, eko_utils, evolve, utils import numpy as np from eko.runner.managed import solve from n3fit.io.writer import XGRID -from validphys.loader import FallbackLoader +from validphys.loader import FallbackLoader, Loader _logger = logging.getLogger(__name__) @@ -115,6 +115,7 @@ def main(): "-p", "--q-points", type=int, default=None, help="Number of q points for the evolution" ) parser.add_argument("-n", "--n-cores", type=int, default=1, help="Number of cores to be used") + parser.add_argument("--no-net", action="store_true", help="Emulates validphys' --no-net") parser.add_argument( "-e", "--ev-op-iterations", @@ -146,21 +147,37 @@ def main(): if args.use_fhmruvv: theory_card_info["use_fhmruvv"] = args.use_fhmruvv + if args.no_net: + loader = Loader() + else: + loader = FallbackLoader() + if args.actions == "evolve": + + if args.load is None: + fit_folder = pathlib.Path(args.configuration_folder) + _logger.info(f"Loading theory {theoryID}") + theoryID = utils.get_theoryID_from_runcard(fit_folder) + + _logger.info(f"Loading eko from theory {theoryID}") + eko_path = loader.check_eko(theoryID) + else: + eko_path = args.load + cli.cli_evolven3fit( - args.configuration_folder, + fit_folder, args.q_fin, args.q_points, op_card_info, theory_card_info, - args.dump, - args.load, args.force, + eko_path, + None, ) else: # If we are in the business of producing an eko, do some checks before starting: # 1. load the nnpdf theory early to check for inconsistent options and theory problems - nnpdf_theory = FallbackLoader().check_theoryID(args.theoryID).get_description() + nnpdf_theory = loader.check_theoryID(args.theoryID).get_description() if nnpdf_theory.get("ModEv") == "TRN" and args.ev_op_iterations is not None: raise ValueError("ev_op_iterations is not accepted with ModEv=TRN solution") From 7ff48eb436e9ddbc7f6c8e22b99a6ca4ac0284f6 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 23 May 2024 17:23:49 +0200 Subject: [PATCH 3/5] add a evolven3fit check action to look for the eko at setupfit time --- n3fit/src/n3fit/checks.py | 12 ++++++++++++ n3fit/src/n3fit/n3fit_checks_provider.py | 5 +++++ n3fit/src/n3fit/scripts/vp_setupfit.py | 9 ++++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py index 607d838e55..3ff404ffe5 100644 --- a/n3fit/src/n3fit/checks.py +++ b/n3fit/src/n3fit/checks.py @@ -9,6 +9,7 @@ from n3fit.hyper_optimization import penalties as penalties_module from n3fit.hyper_optimization.rewards import IMPLEMENTED_LOSSES, IMPLEMENTED_STATS from reportengine.checks import CheckError, make_argcheck +from validphys.loader import FallbackLoader from validphys.pdfbases import check_basis log = logging.getLogger(__name__) @@ -485,3 +486,14 @@ def check_polarized_configs(fitting, fitbasis, positivity_bound): ) if fitting.get("sum_rules", True) and fitting.get("sum_rules") != "TSR": raise CheckError("The 'sum_rules' key needs to be 'TSR' for polarised PDF fits.") + + +@make_argcheck +def check_eko_exists(theoryid): + """Check that an eko for this theory exists. + Since there might still be theories without an associated eko, + this function raises a logger' error instead of an Exception.""" + try: + _ = FallbackLoader().check_eko(theoryid.id) + except FileNotFoundError: + log.error(f"No eko found for {theoryid}") diff --git a/n3fit/src/n3fit/n3fit_checks_provider.py b/n3fit/src/n3fit/n3fit_checks_provider.py index 79a19bdb05..a06b3f0efd 100644 --- a/n3fit/src/n3fit/n3fit_checks_provider.py +++ b/n3fit/src/n3fit/n3fit_checks_provider.py @@ -32,3 +32,8 @@ def n3fit_checks_action( double_precision=False, ): return + + +@n3fit.checks.check_eko_exists +def evolven3fit_checks_action(theoryid): + return diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index 3fe9ae8b47..c56b716fce 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -38,7 +38,13 @@ from validphys.app import App from validphys.config import Config, ConfigError, Environment, EnvironmentError_ -SETUPFIT_FIXED_CONFIG = dict(actions_=['datacuts check_t0pdfset', 'theory check_positivity']) +SETUPFIT_FIXED_CONFIG = dict( + actions_=[ + 'datacuts check_t0pdfset', + 'theory check_positivity', + 'theory evolven3fit_checks_action', + ] +) SETUPFIT_PROVIDERS = [ 'n3fit.n3fit_checks_provider', @@ -159,6 +165,7 @@ def from_yaml(cls, o, *args, **kwargs): SETUPFIT_FIXED_CONFIG['actions_'].append('positivity_bound check_unpolarized_bc') for k, v in SETUPFIT_DEFAULTS.items(): file_content.setdefault(k, v) + file_content.update(SETUPFIT_FIXED_CONFIG) return cls(file_content, *args, **kwargs) From de5e37546a21bb2d3a31bc04a3d5bf4a27e3207d Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 23 May 2024 21:26:04 +0200 Subject: [PATCH 4/5] fix log --- n3fit/src/n3fit/scripts/evolven3fit.py | 1 - 1 file changed, 1 deletion(-) diff --git a/n3fit/src/n3fit/scripts/evolven3fit.py b/n3fit/src/n3fit/scripts/evolven3fit.py index 4441f0ddd3..a304e188ec 100644 --- a/n3fit/src/n3fit/scripts/evolven3fit.py +++ b/n3fit/src/n3fit/scripts/evolven3fit.py @@ -156,7 +156,6 @@ def main(): if args.load is None: fit_folder = pathlib.Path(args.configuration_folder) - _logger.info(f"Loading theory {theoryID}") theoryID = utils.get_theoryID_from_runcard(fit_folder) _logger.info(f"Loading eko from theory {theoryID}") From 8cd667d7374f8513854c6a14224a6b4d61d69141 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 28 May 2024 11:26:18 +0200 Subject: [PATCH 5/5] remove on failure --- validphys2/src/validphys/loader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py index a75d05143d..4e8e54b429 100644 --- a/validphys2/src/validphys/loader.py +++ b/validphys2/src/validphys/loader.py @@ -921,7 +921,7 @@ def _download_and_show(response, stream): sys.stdout.write('\n') -def download_file(url, stream_or_path, make_parents=False): +def download_file(url, stream_or_path, make_parents=False, delete_on_failure=False): """Download a file and show a progress bar if the INFO log level is enabled. If ``make_parents`` is ``True`` ``stream_or_path`` is path-like, all the parent folders will @@ -950,7 +950,7 @@ def download_file(url, stream_or_path, make_parents=False): p.parent.mkdir(exist_ok=True, parents=True) download_target = tempfile.NamedTemporaryFile( - delete=False, dir=p.parent, prefix=p.name, suffix='.part' + delete=delete_on_failure, dir=p.parent, prefix=p.name, suffix='.part' ) with download_target as f: @@ -1344,7 +1344,7 @@ def download_eko(self, thid): # Check that we have the theory we need theory = self.check_theoryID(thid) target_path = theory.path / "eko.tar" - download_file(remote[thid], target_path) + download_file(remote[thid], target_path, delete_on_failure=True) def download_vp_output_file(self, filename, **kwargs): try: