NNPDF · scarlehoff · Mar 25, 2024 · Mar 22, 2024 · Mar 25, 2024
diff --git a/doc/sphinx/make_theory_csv.py b/doc/sphinx/make_theory_csv.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 
 from nnpdf_data import theory_cards
-from validphys.theorydbutils import fetch_all
+from nnpdf_data.theorydbutils import fetch_all
 
 if __name__ == "__main__":
     parser = ArgumentParser()

diff --git a/doc/sphinx/source/data/data-config.rst b/doc/sphinx/source/data/data-config.rst
@@ -10,16 +10,16 @@ choices. In the code, every effort has been made to keep experimental and
 theoretical parameters strictly separate.
 In this section we shall specify the layout of the ``nnpdf`` data
 directory. It is in this directory that all of the read-only data to be used in
-the fit are accessed. The data directory is located in the ``nnpdf`` git
-repository, under the path ``validphys/src/validphys2/datafiles``.
+the fit are accessed. The data files is located in the ``nnpdf`` git
+repository, under the ``nnpdf_data`` package.
 
 Experimental data storage
 =========================
 
 The central repository for ``CommonData`` in use by ``nnpdf`` projects is
 located in the ``nnpdf`` git repository at
 
-	``validphys/src/validphys2/datafiles/commondata``
+	``nnpdf_data/nnpdf_data/new_commondata``
 
 where a separate ``CommonData`` file is stored for each *Dataset* with the
 filename format described in :ref:`dataset-naming-convention`.
@@ -33,28 +33,28 @@ Theory lookup table
 In order to organise the various different theoretical treatments available,
 the theory definitions are saved in theory cards located in
 
-	``validphys/src/validphys2/datafiles/theory_cards``
+	``nnpdf_data/nnpdf_data/theory_cards``
 
 in the form of ``yaml`` files. A new theory can be added by simply adding a new
 ``yaml`` file with the desired theory ID. The definition of the accepted and required parameters
 can be found at:
 
-	``validphys/src/validphys2/theorydbutils.py``
+	``nnpdf_data/nnpdf_data/theorydbutils.py``
 
 The following lines will check whether a newly added theory can be read by validphys
 (change 700 by the id of your newly added theory).
 
 ..  code-block:: python
 
     from nnpdf_data import theory_cards
-    from validphys.theorydbutils import fetch_theory
+    from nnpdf_data.theorydbutils import fetch_theory
     theory = fetch_theory(theory_cards, 700)
 
 A script is provided to
 give a brief overview of the various theory options available. It can be found
 at
 
-	``validphys/src/validphys2/datafiles/disp_theory.py``
+	``nnpdf_data/nnpdf_data/disp_theory.py``
 
 and should be run without any arguments.
 

diff --git a/validphys2/src/validphys/theorydbutils.py → nnpdf_data/nnpdf_data/theorydbutils.py b/validphys2/src/validphys/theorydbutils.py → nnpdf_data/nnpdf_data/theorydbutils.py
@@ -11,7 +11,7 @@
 
 import pandas as pd
 
-from validphys.utils import parse_yaml_inp
+from .utils import parse_yaml_inp
 
 
 @dataclass(frozen=True)
@@ -90,7 +90,7 @@ def fetch_theory(theory_database: Path, theoryID: int):
     Example
     ------
     >>> from nnpdf_data import theory_cards
-    >>> from validphys.theorydbutils import fetch_theory
+    >>> from nnpdf_data.theorydbutils import fetch_theory
     >>> theory = fetch_theory(theory_cards, 700)
     """
     filepath = theory_database / f"{theoryID}.yaml"
@@ -117,7 +117,7 @@ def fetch_all(theory_database: Path):
     Example
     ------
     >>> from validphys.datafiles import theory_cards
-    >>> from validphys.theorydbutils import fetch_all
+    >>> from nnpdf_data.theorydbutils import fetch_all
     >>> theory_df = fetch_all(theory_cards)
     """
     theories = []

diff --git a/nnpdf_data/nnpdf_data/utils.py b/nnpdf_data/nnpdf_data/utils.py
@@ -0,0 +1,52 @@
+import pathlib
+
+import ruamel.yaml as yaml
+from validobj import ValidationError, parse_input
+
+
+def parse_yaml_inp(input_yaml, spec):
+    """
+    Helper function to parse yaml using the `validobj` library and print
+    useful error messages in case of a parsing error.
+
+    https://validobj.readthedocs.io/en/latest/examples.html#yaml-line-numbers
+    """
+    input_yaml = pathlib.Path(input_yaml)
+    inp = yaml.round_trip_load(input_yaml.open("r", encoding="utf-8"))
+    try:
+        return parse_input(inp, spec)
+    except ValidationError as e:
+        current_exc = e
+        current_inp = inp
+        error_text_lines = []
+        while current_exc:
+            if hasattr(current_exc, 'wrong_field'):
+                wrong_field = current_exc.wrong_field
+                # Mappings compping from ``round_trip_load`` have an
+                # ``lc`` attribute that gives a tuple of
+                # ``(line_number, column)`` for a given item in
+                # the mapping.
+                line = current_inp.lc.item(wrong_field)[0]
+                error_text_lines.append(f"Problem processing key at line {line} in {input_yaml}:")
+                current_inp = current_inp[wrong_field]
+            elif hasattr(current_exc, 'wrong_index'):
+                wrong_index = current_exc.wrong_index
+                # Similarly lists allow to retrieve the line number for
+                # a given item.
+                line = current_inp.lc.item(wrong_index)[0]
+                current_inp = current_inp[wrong_index]
+                error_text_lines.append(
+                    f"Problem processing list item at line {line} in {input_yaml}:"
+                )
+            elif hasattr(current_exc, 'unknown'):
+                unknown_lines = []
+                for u in current_exc.unknown:
+                    unknown_lines.append((current_inp.lc.item(u)[0], u))
+                unknown_lines.sort()
+                for line, key in unknown_lines:
+                    error_text_lines.append(
+                        f"Unknown key {key!r} defined at line {line} in {input_yaml}:"
+                    )
+            error_text_lines.append(str(current_exc))
+            current_exc = current_exc.__cause__
+        raise ValidationError('\n'.join(error_text_lines)) from e
diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py
@@ -49,14 +49,14 @@
 from validobj.custom import Parser
 
 from nnpdf_data import new_to_legacy_map, path_commondata
+from nnpdf_data.utils import parse_yaml_inp
 
 # We cannot use ruamel directly due to the ambiguity ruamel.yaml / ruamel_yaml
 # of some versions which are pinned in some of the conda packages we use...
 from reportengine.compat import yaml
 from validphys.coredata import KIN_NAMES, CommonData
 from validphys.plotoptions.plottingoptions import PlottingOptions, labeler_functions
 from validphys.process_options import ValidProcess
-from validphys.utils import parse_yaml_inp
 
 try:
     # If libyaml is available, use the C loader to speed up some of the read

diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py
@@ -1,6 +1,7 @@
 """
 Core datastructures used in the validphys data model.
 """
+
 import dataclasses
 import enum
 import functools
@@ -12,6 +13,7 @@
 
 import numpy as np
 
+from nnpdf_data.theorydbutils import fetch_theory
 from reportengine import namespaces
 from reportengine.baseexceptions import AsInputError
 from reportengine.compat import yaml
@@ -24,7 +26,6 @@
 from validphys.hyperoptplot import HyperoptTrial
 from validphys.lhapdfset import LHAPDFSet
 from validphys.tableloader import parse_exp_mat
-from validphys.theorydbutils import fetch_theory
 from validphys.utils import experiments_to_dataset_inputs
 
 log = logging.getLogger(__name__)

diff --git a/validphys2/src/validphys/plotoptions/core.py b/validphys2/src/validphys/plotoptions/core.py
@@ -7,13 +7,13 @@
 import numpy as np
 import pandas as pd
 
+from nnpdf_data.utils import parse_yaml_inp
 from reportengine.floatformatting import format_number
 from reportengine.utils import ChainMap
 from validphys.core import CommonDataSpec, DataSetSpec
 from validphys.coredata import CommonData
 from validphys.plotoptions.plottingoptions import PlottingOptions, default_labels, labeler_functions
 from validphys.plotoptions.utils import apply_to_all_columns
-from validphys.utils import parse_yaml_inp
 
 log = logging.getLogger(__name__)
 

diff --git a/validphys2/src/validphys/scripts/vp_checktheory.py b/validphys2/src/validphys/scripts/vp_checktheory.py
@@ -34,11 +34,10 @@
 import pathlib
 import sys
 
+from nnpdf_data.theorydbutils import TheoryNotFoundInDatabase
 from reportengine import colors
 from reportengine.table import savetable
-
 from validphys.loader import FallbackLoader
-from validphys.theorydbutils import TheoryNotFoundInDatabase
 from validphys.theoryinfo import theory_info_table
 
 log = logging.getLogger(__name__)
@@ -56,18 +55,15 @@
 
 def main():
     parser = argparse.ArgumentParser(
-        description=__doc__,
-        formatter_class=argparse.RawDescriptionHelpFormatter
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
     )
     group = parser.add_mutually_exclusive_group(required=True)
     group.add_argument(
         'theoryid',
         nargs='?',
         default=None,
-        help=(
-            "Numeric identifier of theory to look up info of"
-        ),
-        type=int
+        help=("Numeric identifier of theory to look up info of"),
+        type=int,
     )
     group.add_argument(
         '--fit',
@@ -76,7 +72,7 @@ def main():
             "Name of a fit from which to parse `theoryid` from, instead of "
             "supplying theoryid on command line"
         ),
-        default=None
+        default=None,
     )
     parser.add_argument(
         '--dumptable',

diff --git a/validphys2/src/validphys/tests/test_theorydbutils.py b/validphys2/src/validphys/tests/test_theorydbutils.py
@@ -1,9 +1,8 @@
 import pytest
 
-from validphys.loader import Loader
+from nnpdf_data.theorydbutils import TheoryNotFoundInDatabase, fetch_all, fetch_theory
 from validphys.api import API
-from validphys.theorydbutils import fetch_theory, TheoryNotFoundInDatabase, fetch_all
-
+from validphys.loader import Loader
 
 L = Loader()
 DBPATH = L.theorydb_folder

diff --git a/validphys2/src/validphys/theoryinfo.py b/validphys2/src/validphys/theoryinfo.py
@@ -6,8 +6,8 @@
 """
 from pandas import DataFrame
 
+from nnpdf_data.theorydbutils import fetch_all, fetch_theory
 from reportengine.table import table
-from validphys.theorydbutils import fetch_all, fetch_theory
 
 
 @table
@@ -60,8 +60,6 @@ def theory_info_table(theory_database, theory_db_id):
     """
     res_dict = fetch_theory(theory_database, theory_db_id)
     res_df = DataFrame(
-        list(res_dict.values()),
-        index=res_dict.keys(),
-        columns=[f'Info for theory {theory_db_id}'],
+        list(res_dict.values()), index=res_dict.keys(), columns=[f'Info for theory {theory_db_id}']
     )
     return res_df
diff --git a/validphys2/src/validphys/utils.py b/validphys2/src/validphys/utils.py
@@ -7,9 +7,6 @@
 
 from frozendict import frozendict
 import numpy as np
-from validobj import ValidationError, parse_input
-
-from reportengine.compat import yaml
 
 
 def make_hashable(obj: Any):
@@ -48,54 +45,6 @@ def generate_path_filtered_data(fit_path, setname):
     return data_path, unc_path
 
 
-def parse_yaml_inp(input_yaml, spec):
-    """
-    Helper function to parse yaml using the `validobj` library and print
-    useful error messages in case of a parsing error.
-
-    https://validobj.readthedocs.io/en/latest/examples.html#yaml-line-numbers
-    """
-    input_yaml = pathlib.Path(input_yaml)
-    inp = yaml.round_trip_load(input_yaml.open("r", encoding="utf-8"))
-    try:
-        return parse_input(inp, spec)
-    except ValidationError as e:
-        current_exc = e
-        current_inp = inp
-        error_text_lines = []
-        while current_exc:
-            if hasattr(current_exc, 'wrong_field'):
-                wrong_field = current_exc.wrong_field
-                # Mappings compping from ``round_trip_load`` have an
-                # ``lc`` attribute that gives a tuple of
-                # ``(line_number, column)`` for a given item in
-                # the mapping.
-                line = current_inp.lc.item(wrong_field)[0]
-                error_text_lines.append(f"Problem processing key at line {line} in {input_yaml}:")
-                current_inp = current_inp[wrong_field]
-            elif hasattr(current_exc, 'wrong_index'):
-                wrong_index = current_exc.wrong_index
-                # Similarly lists allow to retrieve the line number for
-                # a given item.
-                line = current_inp.lc.item(wrong_index)[0]
-                current_inp = current_inp[wrong_index]
-                error_text_lines.append(
-                    f"Problem processing list item at line {line} in {input_yaml}:"
-                )
-            elif hasattr(current_exc, 'unknown'):
-                unknown_lines = []
-                for u in current_exc.unknown:
-                    unknown_lines.append((current_inp.lc.item(u)[0], u))
-                unknown_lines.sort()
-                for line, key in unknown_lines:
-                    error_text_lines.append(
-                        f"Unknown key {key!r} defined at line {line} in {input_yaml}:"
-                    )
-            error_text_lines.append(str(current_exc))
-            current_exc = current_exc.__cause__
-        raise ValidationError('\n'.join(error_text_lines)) from e
-
-
 @contextlib.contextmanager
 def tempfile_cleaner(root, exit_func, exc, prefix=None, **kwargs):
     """A context manager to handle temporary directory creation and