diff --git a/doc/sphinx/make_theory_csv.py b/doc/sphinx/make_theory_csv.py index 2c1cc29ff8..c87a256621 100644 --- a/doc/sphinx/make_theory_csv.py +++ b/doc/sphinx/make_theory_csv.py @@ -6,7 +6,7 @@ from pathlib import Path from nnpdf_data import theory_cards -from validphys.theorydbutils import fetch_all +from nnpdf_data.theorydbutils import fetch_all if __name__ == "__main__": parser = ArgumentParser() diff --git a/doc/sphinx/source/data/data-config.rst b/doc/sphinx/source/data/data-config.rst index 77954826ee..ae44d3a6fa 100644 --- a/doc/sphinx/source/data/data-config.rst +++ b/doc/sphinx/source/data/data-config.rst @@ -10,8 +10,8 @@ choices. In the code, every effort has been made to keep experimental and theoretical parameters strictly separate. In this section we shall specify the layout of the ``nnpdf`` data directory. It is in this directory that all of the read-only data to be used in -the fit are accessed. The data directory is located in the ``nnpdf`` git -repository, under the path ``validphys/src/validphys2/datafiles``. +the fit are accessed. The data files is located in the ``nnpdf`` git +repository, under the ``nnpdf_data`` package. Experimental data storage ========================= @@ -19,7 +19,7 @@ Experimental data storage The central repository for ``CommonData`` in use by ``nnpdf`` projects is located in the ``nnpdf`` git repository at - ``validphys/src/validphys2/datafiles/commondata`` + ``nnpdf_data/nnpdf_data/new_commondata`` where a separate ``CommonData`` file is stored for each *Dataset* with the filename format described in :ref:`dataset-naming-convention`. @@ -33,13 +33,13 @@ Theory lookup table In order to organise the various different theoretical treatments available, the theory definitions are saved in theory cards located in - ``validphys/src/validphys2/datafiles/theory_cards`` + ``nnpdf_data/nnpdf_data/theory_cards`` in the form of ``yaml`` files. A new theory can be added by simply adding a new ``yaml`` file with the desired theory ID. The definition of the accepted and required parameters can be found at: - ``validphys/src/validphys2/theorydbutils.py`` + ``nnpdf_data/nnpdf_data/theorydbutils.py`` The following lines will check whether a newly added theory can be read by validphys (change 700 by the id of your newly added theory). @@ -47,14 +47,14 @@ The following lines will check whether a newly added theory can be read by valid .. code-block:: python from nnpdf_data import theory_cards - from validphys.theorydbutils import fetch_theory + from nnpdf_data.theorydbutils import fetch_theory theory = fetch_theory(theory_cards, 700) A script is provided to give a brief overview of the various theory options available. It can be found at - ``validphys/src/validphys2/datafiles/disp_theory.py`` + ``nnpdf_data/nnpdf_data/disp_theory.py`` and should be run without any arguments. diff --git a/validphys2/src/validphys/theorydbutils.py b/nnpdf_data/nnpdf_data/theorydbutils.py similarity index 94% rename from validphys2/src/validphys/theorydbutils.py rename to nnpdf_data/nnpdf_data/theorydbutils.py index abe3cad4d0..defc35577f 100644 --- a/validphys2/src/validphys/theorydbutils.py +++ b/nnpdf_data/nnpdf_data/theorydbutils.py @@ -11,7 +11,7 @@ import pandas as pd -from validphys.utils import parse_yaml_inp +from .utils import parse_yaml_inp @dataclass(frozen=True) @@ -90,7 +90,7 @@ def fetch_theory(theory_database: Path, theoryID: int): Example ------ >>> from nnpdf_data import theory_cards - >>> from validphys.theorydbutils import fetch_theory + >>> from nnpdf_data.theorydbutils import fetch_theory >>> theory = fetch_theory(theory_cards, 700) """ filepath = theory_database / f"{theoryID}.yaml" @@ -117,7 +117,7 @@ def fetch_all(theory_database: Path): Example ------ >>> from validphys.datafiles import theory_cards - >>> from validphys.theorydbutils import fetch_all + >>> from nnpdf_data.theorydbutils import fetch_all >>> theory_df = fetch_all(theory_cards) """ theories = [] diff --git a/nnpdf_data/nnpdf_data/utils.py b/nnpdf_data/nnpdf_data/utils.py new file mode 100644 index 0000000000..2fe7c15c4c --- /dev/null +++ b/nnpdf_data/nnpdf_data/utils.py @@ -0,0 +1,52 @@ +import pathlib + +import ruamel.yaml as yaml +from validobj import ValidationError, parse_input + + +def parse_yaml_inp(input_yaml, spec): + """ + Helper function to parse yaml using the `validobj` library and print + useful error messages in case of a parsing error. + + https://validobj.readthedocs.io/en/latest/examples.html#yaml-line-numbers + """ + input_yaml = pathlib.Path(input_yaml) + inp = yaml.round_trip_load(input_yaml.open("r", encoding="utf-8")) + try: + return parse_input(inp, spec) + except ValidationError as e: + current_exc = e + current_inp = inp + error_text_lines = [] + while current_exc: + if hasattr(current_exc, 'wrong_field'): + wrong_field = current_exc.wrong_field + # Mappings compping from ``round_trip_load`` have an + # ``lc`` attribute that gives a tuple of + # ``(line_number, column)`` for a given item in + # the mapping. + line = current_inp.lc.item(wrong_field)[0] + error_text_lines.append(f"Problem processing key at line {line} in {input_yaml}:") + current_inp = current_inp[wrong_field] + elif hasattr(current_exc, 'wrong_index'): + wrong_index = current_exc.wrong_index + # Similarly lists allow to retrieve the line number for + # a given item. + line = current_inp.lc.item(wrong_index)[0] + current_inp = current_inp[wrong_index] + error_text_lines.append( + f"Problem processing list item at line {line} in {input_yaml}:" + ) + elif hasattr(current_exc, 'unknown'): + unknown_lines = [] + for u in current_exc.unknown: + unknown_lines.append((current_inp.lc.item(u)[0], u)) + unknown_lines.sort() + for line, key in unknown_lines: + error_text_lines.append( + f"Unknown key {key!r} defined at line {line} in {input_yaml}:" + ) + error_text_lines.append(str(current_exc)) + current_exc = current_exc.__cause__ + raise ValidationError('\n'.join(error_text_lines)) from e diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index c6a89a2363..bb2bc37184 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -49,6 +49,7 @@ from validobj.custom import Parser from nnpdf_data import new_to_legacy_map, path_commondata +from nnpdf_data.utils import parse_yaml_inp # We cannot use ruamel directly due to the ambiguity ruamel.yaml / ruamel_yaml # of some versions which are pinned in some of the conda packages we use... @@ -56,7 +57,6 @@ from validphys.coredata import KIN_NAMES, CommonData from validphys.plotoptions.plottingoptions import PlottingOptions, labeler_functions from validphys.process_options import ValidProcess -from validphys.utils import parse_yaml_inp try: # If libyaml is available, use the C loader to speed up some of the read diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 3d85c321bb..ecc3dc2f98 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -1,6 +1,7 @@ """ Core datastructures used in the validphys data model. """ + import dataclasses import enum import functools @@ -12,6 +13,7 @@ import numpy as np +from nnpdf_data.theorydbutils import fetch_theory from reportengine import namespaces from reportengine.baseexceptions import AsInputError from reportengine.compat import yaml @@ -24,7 +26,6 @@ from validphys.hyperoptplot import HyperoptTrial from validphys.lhapdfset import LHAPDFSet from validphys.tableloader import parse_exp_mat -from validphys.theorydbutils import fetch_theory from validphys.utils import experiments_to_dataset_inputs log = logging.getLogger(__name__) diff --git a/validphys2/src/validphys/plotoptions/core.py b/validphys2/src/validphys/plotoptions/core.py index 8b3a6deaeb..b787521dcc 100644 --- a/validphys2/src/validphys/plotoptions/core.py +++ b/validphys2/src/validphys/plotoptions/core.py @@ -7,13 +7,13 @@ import numpy as np import pandas as pd +from nnpdf_data.utils import parse_yaml_inp from reportengine.floatformatting import format_number from reportengine.utils import ChainMap from validphys.core import CommonDataSpec, DataSetSpec from validphys.coredata import CommonData from validphys.plotoptions.plottingoptions import PlottingOptions, default_labels, labeler_functions from validphys.plotoptions.utils import apply_to_all_columns -from validphys.utils import parse_yaml_inp log = logging.getLogger(__name__) diff --git a/validphys2/src/validphys/scripts/vp_checktheory.py b/validphys2/src/validphys/scripts/vp_checktheory.py index af8b7674a2..403741526f 100755 --- a/validphys2/src/validphys/scripts/vp_checktheory.py +++ b/validphys2/src/validphys/scripts/vp_checktheory.py @@ -34,11 +34,10 @@ import pathlib import sys +from nnpdf_data.theorydbutils import TheoryNotFoundInDatabase from reportengine import colors from reportengine.table import savetable - from validphys.loader import FallbackLoader -from validphys.theorydbutils import TheoryNotFoundInDatabase from validphys.theoryinfo import theory_info_table log = logging.getLogger(__name__) @@ -56,18 +55,15 @@ def main(): parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument( 'theoryid', nargs='?', default=None, - help=( - "Numeric identifier of theory to look up info of" - ), - type=int + help=("Numeric identifier of theory to look up info of"), + type=int, ) group.add_argument( '--fit', @@ -76,7 +72,7 @@ def main(): "Name of a fit from which to parse `theoryid` from, instead of " "supplying theoryid on command line" ), - default=None + default=None, ) parser.add_argument( '--dumptable', diff --git a/validphys2/src/validphys/tests/test_theorydbutils.py b/validphys2/src/validphys/tests/test_theorydbutils.py index 68df55e29f..c812508445 100644 --- a/validphys2/src/validphys/tests/test_theorydbutils.py +++ b/validphys2/src/validphys/tests/test_theorydbutils.py @@ -1,9 +1,8 @@ import pytest -from validphys.loader import Loader +from nnpdf_data.theorydbutils import TheoryNotFoundInDatabase, fetch_all, fetch_theory from validphys.api import API -from validphys.theorydbutils import fetch_theory, TheoryNotFoundInDatabase, fetch_all - +from validphys.loader import Loader L = Loader() DBPATH = L.theorydb_folder diff --git a/validphys2/src/validphys/theoryinfo.py b/validphys2/src/validphys/theoryinfo.py index c7ac8df380..3af9ead5b7 100644 --- a/validphys2/src/validphys/theoryinfo.py +++ b/validphys2/src/validphys/theoryinfo.py @@ -6,8 +6,8 @@ """ from pandas import DataFrame +from nnpdf_data.theorydbutils import fetch_all, fetch_theory from reportengine.table import table -from validphys.theorydbutils import fetch_all, fetch_theory @table @@ -60,8 +60,6 @@ def theory_info_table(theory_database, theory_db_id): """ res_dict = fetch_theory(theory_database, theory_db_id) res_df = DataFrame( - list(res_dict.values()), - index=res_dict.keys(), - columns=[f'Info for theory {theory_db_id}'], + list(res_dict.values()), index=res_dict.keys(), columns=[f'Info for theory {theory_db_id}'] ) return res_df diff --git a/validphys2/src/validphys/utils.py b/validphys2/src/validphys/utils.py index af259130e0..64e0e6d5de 100644 --- a/validphys2/src/validphys/utils.py +++ b/validphys2/src/validphys/utils.py @@ -7,9 +7,6 @@ from frozendict import frozendict import numpy as np -from validobj import ValidationError, parse_input - -from reportengine.compat import yaml def make_hashable(obj: Any): @@ -48,54 +45,6 @@ def generate_path_filtered_data(fit_path, setname): return data_path, unc_path -def parse_yaml_inp(input_yaml, spec): - """ - Helper function to parse yaml using the `validobj` library and print - useful error messages in case of a parsing error. - - https://validobj.readthedocs.io/en/latest/examples.html#yaml-line-numbers - """ - input_yaml = pathlib.Path(input_yaml) - inp = yaml.round_trip_load(input_yaml.open("r", encoding="utf-8")) - try: - return parse_input(inp, spec) - except ValidationError as e: - current_exc = e - current_inp = inp - error_text_lines = [] - while current_exc: - if hasattr(current_exc, 'wrong_field'): - wrong_field = current_exc.wrong_field - # Mappings compping from ``round_trip_load`` have an - # ``lc`` attribute that gives a tuple of - # ``(line_number, column)`` for a given item in - # the mapping. - line = current_inp.lc.item(wrong_field)[0] - error_text_lines.append(f"Problem processing key at line {line} in {input_yaml}:") - current_inp = current_inp[wrong_field] - elif hasattr(current_exc, 'wrong_index'): - wrong_index = current_exc.wrong_index - # Similarly lists allow to retrieve the line number for - # a given item. - line = current_inp.lc.item(wrong_index)[0] - current_inp = current_inp[wrong_index] - error_text_lines.append( - f"Problem processing list item at line {line} in {input_yaml}:" - ) - elif hasattr(current_exc, 'unknown'): - unknown_lines = [] - for u in current_exc.unknown: - unknown_lines.append((current_inp.lc.item(u)[0], u)) - unknown_lines.sort() - for line, key in unknown_lines: - error_text_lines.append( - f"Unknown key {key!r} defined at line {line} in {input_yaml}:" - ) - error_text_lines.append(str(current_exc)) - current_exc = current_exc.__cause__ - raise ValidationError('\n'.join(error_text_lines)) from e - - @contextlib.contextmanager def tempfile_cleaner(root, exit_func, exc, prefix=None, **kwargs): """A context manager to handle temporary directory creation and