Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/sphinx/make_theory_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path

from nnpdf_data import theory_cards
from validphys.theorydbutils import fetch_all
from nnpdf_data.theorydbutils import fetch_all

if __name__ == "__main__":
parser = ArgumentParser()
Expand Down
14 changes: 7 additions & 7 deletions doc/sphinx/source/data/data-config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ choices. In the code, every effort has been made to keep experimental and
theoretical parameters strictly separate.
In this section we shall specify the layout of the ``nnpdf`` data
directory. It is in this directory that all of the read-only data to be used in
the fit are accessed. The data directory is located in the ``nnpdf`` git
repository, under the path ``validphys/src/validphys2/datafiles``.
the fit are accessed. The data files is located in the ``nnpdf`` git
repository, under the ``nnpdf_data`` package.

Experimental data storage
=========================

The central repository for ``CommonData`` in use by ``nnpdf`` projects is
located in the ``nnpdf`` git repository at

``validphys/src/validphys2/datafiles/commondata``
``nnpdf_data/nnpdf_data/new_commondata``

where a separate ``CommonData`` file is stored for each *Dataset* with the
filename format described in :ref:`dataset-naming-convention`.
Expand All @@ -33,28 +33,28 @@ Theory lookup table
In order to organise the various different theoretical treatments available,
the theory definitions are saved in theory cards located in

``validphys/src/validphys2/datafiles/theory_cards``
``nnpdf_data/nnpdf_data/theory_cards``

in the form of ``yaml`` files. A new theory can be added by simply adding a new
``yaml`` file with the desired theory ID. The definition of the accepted and required parameters
can be found at:

``validphys/src/validphys2/theorydbutils.py``
``nnpdf_data/nnpdf_data/theorydbutils.py``

The following lines will check whether a newly added theory can be read by validphys
(change 700 by the id of your newly added theory).

.. code-block:: python

from nnpdf_data import theory_cards
from validphys.theorydbutils import fetch_theory
from nnpdf_data.theorydbutils import fetch_theory
theory = fetch_theory(theory_cards, 700)
Comment thread
RoyStegeman marked this conversation as resolved.

A script is provided to
give a brief overview of the various theory options available. It can be found
at

``validphys/src/validphys2/datafiles/disp_theory.py``
``nnpdf_data/nnpdf_data/disp_theory.py``

and should be run without any arguments.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import pandas as pd

from validphys.utils import parse_yaml_inp
from .utils import parse_yaml_inp


@dataclass(frozen=True)
Expand Down Expand Up @@ -90,7 +90,7 @@ def fetch_theory(theory_database: Path, theoryID: int):
Example
------
>>> from nnpdf_data import theory_cards
>>> from validphys.theorydbutils import fetch_theory
>>> from nnpdf_data.theorydbutils import fetch_theory
>>> theory = fetch_theory(theory_cards, 700)
"""
filepath = theory_database / f"{theoryID}.yaml"
Expand All @@ -117,7 +117,7 @@ def fetch_all(theory_database: Path):
Example
------
>>> from validphys.datafiles import theory_cards
>>> from validphys.theorydbutils import fetch_all
>>> from nnpdf_data.theorydbutils import fetch_all
>>> theory_df = fetch_all(theory_cards)
"""
theories = []
Expand Down
52 changes: 52 additions & 0 deletions nnpdf_data/nnpdf_data/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import pathlib

import ruamel.yaml as yaml
from validobj import ValidationError, parse_input


def parse_yaml_inp(input_yaml, spec):
"""
Helper function to parse yaml using the `validobj` library and print
useful error messages in case of a parsing error.

https://validobj.readthedocs.io/en/latest/examples.html#yaml-line-numbers
"""
input_yaml = pathlib.Path(input_yaml)
inp = yaml.round_trip_load(input_yaml.open("r", encoding="utf-8"))
try:
return parse_input(inp, spec)
except ValidationError as e:
current_exc = e
current_inp = inp
error_text_lines = []
while current_exc:
if hasattr(current_exc, 'wrong_field'):
wrong_field = current_exc.wrong_field
# Mappings compping from ``round_trip_load`` have an
# ``lc`` attribute that gives a tuple of
# ``(line_number, column)`` for a given item in
# the mapping.
line = current_inp.lc.item(wrong_field)[0]
error_text_lines.append(f"Problem processing key at line {line} in {input_yaml}:")
current_inp = current_inp[wrong_field]
elif hasattr(current_exc, 'wrong_index'):
wrong_index = current_exc.wrong_index
# Similarly lists allow to retrieve the line number for
# a given item.
line = current_inp.lc.item(wrong_index)[0]
current_inp = current_inp[wrong_index]
error_text_lines.append(
f"Problem processing list item at line {line} in {input_yaml}:"
)
elif hasattr(current_exc, 'unknown'):
unknown_lines = []
for u in current_exc.unknown:
unknown_lines.append((current_inp.lc.item(u)[0], u))
unknown_lines.sort()
for line, key in unknown_lines:
error_text_lines.append(
f"Unknown key {key!r} defined at line {line} in {input_yaml}:"
)
error_text_lines.append(str(current_exc))
current_exc = current_exc.__cause__
raise ValidationError('\n'.join(error_text_lines)) from e
2 changes: 1 addition & 1 deletion validphys2/src/validphys/commondataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@
from validobj.custom import Parser

from nnpdf_data import new_to_legacy_map, path_commondata
from nnpdf_data.utils import parse_yaml_inp

# We cannot use ruamel directly due to the ambiguity ruamel.yaml / ruamel_yaml
# of some versions which are pinned in some of the conda packages we use...
from reportengine.compat import yaml
from validphys.coredata import KIN_NAMES, CommonData
from validphys.plotoptions.plottingoptions import PlottingOptions, labeler_functions
from validphys.process_options import ValidProcess
from validphys.utils import parse_yaml_inp

try:
# If libyaml is available, use the C loader to speed up some of the read
Expand Down
3 changes: 2 additions & 1 deletion validphys2/src/validphys/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Core datastructures used in the validphys data model.
"""

import dataclasses
import enum
import functools
Expand All @@ -12,6 +13,7 @@

import numpy as np

from nnpdf_data.theorydbutils import fetch_theory
from reportengine import namespaces
from reportengine.baseexceptions import AsInputError
from reportengine.compat import yaml
Expand All @@ -24,7 +26,6 @@
from validphys.hyperoptplot import HyperoptTrial
from validphys.lhapdfset import LHAPDFSet
from validphys.tableloader import parse_exp_mat
from validphys.theorydbutils import fetch_theory
from validphys.utils import experiments_to_dataset_inputs

log = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion validphys2/src/validphys/plotoptions/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
import numpy as np
import pandas as pd

from nnpdf_data.utils import parse_yaml_inp
from reportengine.floatformatting import format_number
from reportengine.utils import ChainMap
from validphys.core import CommonDataSpec, DataSetSpec
from validphys.coredata import CommonData
from validphys.plotoptions.plottingoptions import PlottingOptions, default_labels, labeler_functions
from validphys.plotoptions.utils import apply_to_all_columns
from validphys.utils import parse_yaml_inp

log = logging.getLogger(__name__)

Expand Down
14 changes: 5 additions & 9 deletions validphys2/src/validphys/scripts/vp_checktheory.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,10 @@
import pathlib
import sys

from nnpdf_data.theorydbutils import TheoryNotFoundInDatabase
from reportengine import colors
from reportengine.table import savetable

from validphys.loader import FallbackLoader
from validphys.theorydbutils import TheoryNotFoundInDatabase
from validphys.theoryinfo import theory_info_table

log = logging.getLogger(__name__)
Expand All @@ -56,18 +55,15 @@

def main():
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument(
'theoryid',
nargs='?',
default=None,
help=(
"Numeric identifier of theory to look up info of"
),
type=int
help=("Numeric identifier of theory to look up info of"),
type=int,
)
group.add_argument(
'--fit',
Expand All @@ -76,7 +72,7 @@ def main():
"Name of a fit from which to parse `theoryid` from, instead of "
"supplying theoryid on command line"
),
default=None
default=None,
)
parser.add_argument(
'--dumptable',
Expand Down
5 changes: 2 additions & 3 deletions validphys2/src/validphys/tests/test_theorydbutils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import pytest

from validphys.loader import Loader
from nnpdf_data.theorydbutils import TheoryNotFoundInDatabase, fetch_all, fetch_theory
from validphys.api import API
from validphys.theorydbutils import fetch_theory, TheoryNotFoundInDatabase, fetch_all

from validphys.loader import Loader

L = Loader()
DBPATH = L.theorydb_folder
Expand Down
6 changes: 2 additions & 4 deletions validphys2/src/validphys/theoryinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"""
from pandas import DataFrame

from nnpdf_data.theorydbutils import fetch_all, fetch_theory
from reportengine.table import table
from validphys.theorydbutils import fetch_all, fetch_theory


@table
Expand Down Expand Up @@ -60,8 +60,6 @@ def theory_info_table(theory_database, theory_db_id):
"""
res_dict = fetch_theory(theory_database, theory_db_id)
res_df = DataFrame(
list(res_dict.values()),
index=res_dict.keys(),
columns=[f'Info for theory {theory_db_id}'],
list(res_dict.values()), index=res_dict.keys(), columns=[f'Info for theory {theory_db_id}']
)
return res_df
51 changes: 0 additions & 51 deletions validphys2/src/validphys/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@

from frozendict import frozendict
import numpy as np
from validobj import ValidationError, parse_input

from reportengine.compat import yaml


def make_hashable(obj: Any):
Expand Down Expand Up @@ -48,54 +45,6 @@ def generate_path_filtered_data(fit_path, setname):
return data_path, unc_path


def parse_yaml_inp(input_yaml, spec):
"""
Helper function to parse yaml using the `validobj` library and print
useful error messages in case of a parsing error.

https://validobj.readthedocs.io/en/latest/examples.html#yaml-line-numbers
"""
input_yaml = pathlib.Path(input_yaml)
inp = yaml.round_trip_load(input_yaml.open("r", encoding="utf-8"))
try:
return parse_input(inp, spec)
except ValidationError as e:
current_exc = e
current_inp = inp
error_text_lines = []
while current_exc:
if hasattr(current_exc, 'wrong_field'):
wrong_field = current_exc.wrong_field
# Mappings compping from ``round_trip_load`` have an
# ``lc`` attribute that gives a tuple of
# ``(line_number, column)`` for a given item in
# the mapping.
line = current_inp.lc.item(wrong_field)[0]
error_text_lines.append(f"Problem processing key at line {line} in {input_yaml}:")
current_inp = current_inp[wrong_field]
elif hasattr(current_exc, 'wrong_index'):
wrong_index = current_exc.wrong_index
# Similarly lists allow to retrieve the line number for
# a given item.
line = current_inp.lc.item(wrong_index)[0]
current_inp = current_inp[wrong_index]
error_text_lines.append(
f"Problem processing list item at line {line} in {input_yaml}:"
)
elif hasattr(current_exc, 'unknown'):
unknown_lines = []
for u in current_exc.unknown:
unknown_lines.append((current_inp.lc.item(u)[0], u))
unknown_lines.sort()
for line, key in unknown_lines:
error_text_lines.append(
f"Unknown key {key!r} defined at line {line} in {input_yaml}:"
)
error_text_lines.append(str(current_exc))
current_exc = current_exc.__cause__
raise ValidationError('\n'.join(error_text_lines)) from e


@contextlib.contextmanager
def tempfile_cleaner(root, exit_func, exc, prefix=None, **kwargs):
"""A context manager to handle temporary directory creation and
Expand Down