From 76a8ec4dfa510f5ccc4d23366a169b73859ade18 Mon Sep 17 00:00:00 2001 From: Felix Hekhorn Date: Wed, 8 Apr 2026 15:28:58 +0300 Subject: [PATCH 1/5] Add LHAPDF parser --- src/ekobox/evol_pdf.py | 7 +- src/ekobox/genpdf/__init__.py | 62 +++++++++------ src/ekobox/genpdf/export.py | 114 ++++++---------------------- src/ekobox/genpdf/flavors.py | 31 +++++--- src/ekobox/genpdf/load.py | 43 ++--------- src/ekobox/genpdf/parser.py | 107 ++++++++++++++++++++++++++ tests/ekobox/test_evol_pdf.py | 10 +-- tests/ekobox/test_genpdf.py | 70 ++++++++--------- tests/ekobox/test_genpdf_export.py | 63 +++------------ tests/ekobox/test_genpdf_flavors.py | 75 ++++++++++-------- tests/ekobox/test_genpdf_load.py | 12 +-- tests/ekobox/test_genpdf_parser.py | 64 ++++++++++++++++ 12 files changed, 356 insertions(+), 302 deletions(-) create mode 100644 src/ekobox/genpdf/parser.py create mode 100644 tests/ekobox/test_genpdf_parser.py diff --git a/src/ekobox/evol_pdf.py b/src/ekobox/evol_pdf.py index 6de6db087..5f6ed5a88 100644 --- a/src/ekobox/evol_pdf.py +++ b/src/ekobox/evol_pdf.py @@ -9,6 +9,7 @@ from eko.runner import managed from . import apply, genpdf, info_file +from .genpdf.parser import LhapdfDataFile from .utils import regroup_evolgrid DEFAULT_NAME = "eko.tar" @@ -111,7 +112,9 @@ def evolve_pdfs( genpdf.install_pdf(name) -def collect_blocks(evolved_PDF: dict, q2block_per_nf: dict, xgrid: list): +def collect_blocks( + evolved_PDF: dict, q2block_per_nf: dict, xgrid: list +) -> LhapdfDataFile: """Collect all LHAPDF blocks for a given replica. Parameters @@ -139,4 +142,4 @@ def pdf_xq2(pid, x, Q2): pids=np.array(br.flavor_basis_pids), ) all_blocks.append(block) - return all_blocks + return LhapdfDataFile(header={}, blocks=all_blocks) diff --git a/src/ekobox/genpdf/__init__.py b/src/ekobox/genpdf/__init__.py index 5a8d3de17..a759d2379 100644 --- a/src/ekobox/genpdf/__init__.py +++ b/src/ekobox/genpdf/__init__.py @@ -12,6 +12,7 @@ from eko.io.types import EvolutionPoint as EPoint from . import export, flavors, load +from .parser import LhapdfDataBlock, LhapdfDataFile logger = logging.getLogger(__name__) @@ -21,7 +22,7 @@ def take_data( members: bool = False, xgrid: Optional[List[float]] = None, evolgrid: Optional[List[EPoint]] = None, -): +) -> tuple[dict, list[dict[str, str]], list[LhapdfDataFile]]: """Auxiliary function for `generate_pdf`. It provides the info, the heads of the member files and the blocks @@ -60,35 +61,46 @@ def take_data( else: toylh = toy.mkPDF("", 0) all_blocks.append( - [ - generate_block( - toylh.xfxQ2, xgrid, sorted_q2grid, list(br.flavor_basis_pids) - ) - ] + LhapdfDataFile( + header={"PdfType": "central"}, + blocks=[ + generate_block( + toylh.xfxQ2, + xgrid, + sorted_q2grid, + list(br.flavor_basis_pids), + ) + ], + ) ) else: info = load.load_info_from_file(parent_pdf_set) # iterate on members for m in range(int(info["NumMembers"])): - head, blocks = load.load_blocks_from_file(parent_pdf_set, m) - heads.append(head) - all_blocks.append(blocks) + f = load.load_blocks_from_file(parent_pdf_set, m) + heads.append(f.header) + all_blocks.append(f) if not members: break elif isinstance(parent_pdf_set, dict): info = copy.deepcopy(load.template_info) all_blocks.append( - [ - generate_block( - lambda pid, x, Q2: ( - 0.0 if pid not in parent_pdf_set else parent_pdf_set[pid](x, Q2) - ), - xgrid, - sorted_q2grid, - list(br.flavor_basis_pids), - ) - ] + LhapdfDataFile( + header={"PdfType": "central"}, + blocks=[ + generate_block( + lambda pid, x, Q2: ( + 0.0 + if pid not in parent_pdf_set + else parent_pdf_set[pid](x, Q2) + ), + xgrid, + sorted_q2grid, + list(br.flavor_basis_pids), + ) + ], + ) ) else: raise ValueError("Unknown parent pdf type") @@ -188,7 +200,7 @@ def generate_pdf( info["ForcePositive"] = 0 info["NumMembers"] = len(new_all_blocks) # exporting - export.dump_set(name, info, new_all_blocks, pdf_type_list=heads) + export.dump_set(name, info, new_all_blocks, heads) # install if install: @@ -220,12 +232,14 @@ def install_pdf(name): def generate_block( xfxQ2: Callable, xgrid: List[float], sorted_q2grid: List[float], pids: List[int] -) -> dict: +) -> LhapdfDataBlock: """Generate an LHAPDF data block from a callable.""" - block: dict = dict(mu2grid=sorted_q2grid, pids=pids, xgrid=xgrid) + qgrid = np.sqrt(sorted_q2grid) data = [] for x in xgrid: for mu2 in sorted_q2grid: data.append(np.array([xfxQ2(pid, x, mu2) for pid in pids])) - block["data"] = np.array(data) - return block + data = np.array(data) + return LhapdfDataBlock( + xgrid=np.array(xgrid), qgrid=qgrid, pids=np.array(pids), data=data + ) diff --git a/src/ekobox/genpdf/export.py b/src/ekobox/genpdf/export.py index 273046871..3e577d9d4 100644 --- a/src/ekobox/genpdf/export.py +++ b/src/ekobox/genpdf/export.py @@ -5,89 +5,9 @@ import re from typing import Optional -import numpy as np import yaml - -def list_to_str(ls, fmt="%.6e"): - """Convert a list of numbers to a string. - - Parameters - ---------- - ls : list(float) - list - fmt : str - format string - - Returns - ------- - str : - final string - """ - return " ".join([fmt % x for x in ls]) - - -def array_to_str(ar): - """Convert an array of numbers to a string. - - Parameters - ---------- - ar : list(list(float)) - array - - Returns - ------- - str : - final string - """ - table = "" - for line in ar: - table += f"{line[0]:.8e} " + list_to_str(line[1:], fmt="%.8e") + "\n" - return table - - -def dump_blocks( - name: str, member: int, blocks: list[dict], pdf_type: Optional[str] = None -) -> pathlib.Path: - """Write LHAPDF data file. - - Parameters - ---------- - name : str or os.PathLike - target name or path - member : int - PDF member - blocks : list(dict) - pdf blocks of data - pdf_type : str - PdfType to be copied in the head of member files - - Returns - ------- - pathlib.Path : target file - """ - path_name = pathlib.Path(name) - if path_name.suffix == ".dat": - target = path_name - else: - target = path_name / f"{path_name.stem}_{member:04d}.dat" - target.parent.mkdir(exist_ok=True) - with open(target, "w", encoding="utf-8") as o: - if pdf_type is None: - if member == 0: - o.write("PdfType: central\n") - else: - o.write("PdfType: replica\n") - else: - o.write(pdf_type) - o.write("Format: lhagrid1\n---\n") - for b in blocks: - o.write(list_to_str(b["xgrid"]) + "\n") - o.write(list_to_str(list(np.sqrt(b["mu2grid"]))) + "\n") - o.write(list_to_str(b["pids"], "%d") + "\n") - o.write(array_to_str(b["data"])) - o.write("---\n") - return target +from .parser import LhapdfDataFile def dump_info(name: str, info: dict) -> pathlib.Path: @@ -125,23 +45,35 @@ def dump_info(name: str, info: dict) -> pathlib.Path: return target -def dump_set(name, info, member_blocks, pdf_type_list=None): +def dump_set( + name: str, + info: dict, + member_files: list[LhapdfDataFile], + header_list: Optional[list[dict[str, str]]] = None, +) -> None: """Dump a whole set. Parameters ---------- - name : str + name : target name - info : dict + info : info dictionary - member_blocks : list(list(dict)) + member_files : blocks for all members - pdf_type : list(str) - list of strings to be copied in the head of member files + header_list : + list of optional additional headers to be copied in the head of member files """ dump_info(name, info) - for mem, blocks in enumerate(member_blocks): - if not isinstance(pdf_type_list, list) or len(pdf_type_list) == 0: - dump_blocks(name, mem, blocks) + for mem, f in enumerate(member_files): + # update header if necessary + if header_list is not None and len(header_list) > 0: + f.header.update(header_list[mem]) + # find path + path_name = pathlib.Path(name) + if path_name.suffix == ".dat": + target = path_name else: - dump_blocks(name, mem, blocks, pdf_type=pdf_type_list[mem]) + target = path_name / f"{path_name.stem}_{mem:04d}.dat" + target.parent.mkdir(exist_ok=True) + f.write(target) diff --git a/src/ekobox/genpdf/flavors.py b/src/ekobox/genpdf/flavors.py index d9ee5e6a7..ca269a855 100644 --- a/src/ekobox/genpdf/flavors.py +++ b/src/ekobox/genpdf/flavors.py @@ -6,6 +6,8 @@ from eko import basis_rotation as br +from .parser import LhapdfDataBlock, LhapdfDataFile + def pid_to_flavor(pids): """Create flavor representations from PIDs. @@ -50,26 +52,27 @@ def evol_to_flavor(labels): return np.array(ps) -def project(blocks, reprs): +def project(f: LhapdfDataFile, reprs) -> LhapdfDataFile: """Project some combination of flavors defined by reprs from the blocks. Parameters ---------- - blocks : list(dict) + blocks : PDF blocks - reprs : list(int) + reprs : active distributions in flavor representation Returns ------- - list(dict) : + LhapdfDataFile : filtered blocks """ - new_blocks = copy.deepcopy(blocks) - for block in new_blocks: - current_pids = block["pids"] - current_data = block["data"].T + new_blocks = [] + for block in f.blocks: + current_pids = block.pids + current_data = block.data.T if len(current_data) == 0: + new_blocks.append(copy.deepcopy(block)) continue # load all flavors flavor_data = [np.zeros_like(current_data[0]) for pid in br.flavor_basis_pids] @@ -81,9 +84,15 @@ def project(blocks, reprs): for elem in reprs: proj = elem[:, np.newaxis] * elem new_data += proj @ flavor_data / (elem @ elem) - block["pids"] = br.flavor_basis_pids - block["data"] = np.array(new_data).T - return new_blocks + new_blocks.append( + LhapdfDataBlock( + xgrid=block.xgrid, + qgrid=block.qgrid, + pids=br.flavor_basis_pids, + data=np.array(new_data).T, + ) + ) + return LhapdfDataFile(header=copy.deepcopy(f.header), blocks=new_blocks) def is_evolution_labels(labels): diff --git a/src/ekobox/genpdf/load.py b/src/ekobox/genpdf/load.py index fe3ab8860..2e973f1f1 100644 --- a/src/ekobox/genpdf/load.py +++ b/src/ekobox/genpdf/load.py @@ -2,9 +2,10 @@ import pathlib -import numpy as np import yaml +from .parser import LhapdfDataFile + here = pathlib.Path(__file__).parent # Expose the default template with open(here / "templatePDF.info", encoding="utf-8") as o: @@ -13,7 +14,7 @@ Toy_info = yaml.safe_load(t) -def load_info_from_file(pdfset_name): +def load_info_from_file(pdfset_name: str): """Load the info file from a parent pdf. Parameters @@ -34,7 +35,7 @@ def load_info_from_file(pdfset_name): return info -def load_blocks_from_file(pdfset_name, member): +def load_blocks_from_file(pdfset_name: str, member: int = 0) -> LhapdfDataFile: """Load a pdf from a parent pdf. Parameters @@ -46,38 +47,10 @@ def load_blocks_from_file(pdfset_name, member): Returns ------- - str : - head of member file - list(dict) : - pdf blocks of data + LhapdfDataFile: + data file """ import lhapdf # pylint: disable=import-error, import-outside-toplevel - src = pathlib.Path(lhapdf.paths()[0]) / pdfset_name - # read actual file - cnt = [] - with open(src / f"{pdfset_name}_{member:04d}.dat", encoding="utf-8") as o: - cnt = o.readlines() - # file head - head = cnt[0] - head_section = cnt.index("---\n") - blocks = [] - while head_section < len(cnt) - 1: - # section head - next_head_section = cnt.index("---\n", head_section + 1) - # determine participating pids - xgrid = np.array(cnt[head_section + 1].strip().split(" "), dtype=np.float64) - Qgrid = np.array(cnt[head_section + 2].strip().split(" "), dtype=np.float64) - pids = np.array(cnt[head_section + 3].strip().split(" "), dtype=np.int_) - # data - data = [] - for line in cnt[head_section + 4 : next_head_section]: - elems = np.fromstring(line.strip(), sep=" ") - data.append(elems) - mu2grid = [el * el for el in Qgrid] - blocks.append( - dict(xgrid=xgrid, mu2grid=mu2grid, pids=pids, data=np.array(data)) - ) - # iterate - head_section = next_head_section - return head, blocks + src = pathlib.Path(lhapdf.paths()[0]) + return LhapdfDataFile.read_with_set(src, pdfset_name, member) diff --git a/src/ekobox/genpdf/parser.py b/src/ekobox/genpdf/parser.py new file mode 100644 index 000000000..202f2e3d5 --- /dev/null +++ b/src/ekobox/genpdf/parser.py @@ -0,0 +1,107 @@ +"""LHAPDF data parser.""" + +import pathlib +from dataclasses import dataclass +from io import StringIO +from typing import Self, Type + +import numpy as np +import numpy.typing as npt + + +@dataclass(frozen=True) +class LhapdfDataBlock: + """LHAPDF data block.""" + + xgrid: npt.NDArray[np.float64] + """X grid.""" + + qgrid: npt.NDArray[np.float64] + """Q grid.""" + + pids: npt.NDArray[np.int_] + """|PID| grid.""" + + data: npt.NDArray[np.float64] + """Tabulated data.""" + + def is_valid(self: Self) -> bool: + """Check if dimensions are reasonable.""" + for a in [self.xgrid, self.qgrid, self.pids]: + if len(a.shape) != 1 or len(a) <= 0: + return False + return self.data.shape == (len(self.xgrid) * len(self.qgrid), len(self.pids)) + + +class LhapdfDataFile: + """LHAPDF data file.""" + + header: dict[str, str] + """Header information.""" + + blocks: list[LhapdfDataBlock] + """Data blocks.""" + + def __init__( + self: Self, header: dict[str, str], blocks: list[LhapdfDataBlock] + ) -> None: + self.header = header + self.blocks = blocks + + @classmethod + def read(cls: Type[Self], path: pathlib.Path) -> Self: + """Read from file.""" + cnt = path.read_text().split("---\n") + # header + header = {} + for line in cnt[0].splitlines(): + k, v = line.split(":", 1) + header[k] = v.strip() + # blocks + blocks = [] + for b in cnt[1:]: + # skip last if necessary + if len(b.strip()) == 0: + continue + # subdivide + lines = b.splitlines() + xgrid = np.fromstring(lines[0], np.float64, sep=" ") + qgrid = np.fromstring(lines[1], np.float64, sep=" ") + pids = np.fromstring(lines[2], np.int_, sep=" ") + data = np.loadtxt(StringIO("\n".join(lines[3:]))) + blocks.append( + LhapdfDataBlock(xgrid=xgrid, qgrid=qgrid, pids=pids, data=data) + ) + return cls(header=header, blocks=blocks) + + def write(self: Self, path: pathlib.Path) -> int: + """Write to file.""" + cnt = "" + # header + for k, v in self.header.items(): + cnt += f"{k}: {v}\n" + if "Format" not in self.header: + cnt += "Format: lhagrid1\n" + cnt += "---\n" + # blocks + blocks = [] + for b in self.blocks: + bcnt = StringIO() + np.savetxt(bcnt, b.xgrid, "%.8e", newline=" ") + bcnt.write("\n") + np.savetxt(bcnt, b.qgrid, "%.8e", newline=" ") + bcnt.write("\n") + np.savetxt(bcnt, b.pids, "%d", newline=" ") + bcnt.write("\n") + np.savetxt(bcnt, b.data, "%.8e") + blocks.append(bcnt.getvalue()) + cnt += "---\n".join(blocks) + cnt += "---\n" + return path.write_text(cnt) + + @classmethod + def read_with_set( + cls: Type[Self], path: pathlib.Path, pdfset: str, member: int = 0 + ) -> Self: + """Read given `member` from given `pdfset` inside `path`.""" + return cls.read(path / pdfset / f"{pdfset}_{member:04d}.dat") diff --git a/tests/ekobox/test_evol_pdf.py b/tests/ekobox/test_evol_pdf.py index baf70726d..a55c1b4bd 100644 --- a/tests/ekobox/test_evol_pdf.py +++ b/tests/ekobox/test_evol_pdf.py @@ -119,11 +119,11 @@ def mk(eps): # basic eps = [(3.0, 3), (4.0, 3)] bs = ev_p.collect_blocks(mk(eps), ev_p.regroup_evolgrid(eps), xgrid) - assert len(bs) == 1 - np.testing.assert_allclose(bs[0]["mu2grid"], (3.0, 4.0)) + assert len(bs.blocks) == 1 + np.testing.assert_allclose(bs.blocks[0].qgrid ** 2.0, (3.0, 4.0)) # more advanced eps = [(4.0, 3), (3.0, 3), (5.0, 4), (3.0, 4)] bs = ev_p.collect_blocks(mk(eps), ev_p.regroup_evolgrid(eps), xgrid) - assert len(bs) == 2 - np.testing.assert_allclose(bs[0]["mu2grid"], (3.0, 4.0)) - np.testing.assert_allclose(bs[1]["mu2grid"], (3.0, 5.0)) + assert len(bs.blocks) == 2 + np.testing.assert_allclose(bs.blocks[0].qgrid ** 2.0, (3.0, 4.0)) + np.testing.assert_allclose(bs.blocks[1].qgrid ** 2.0, (3.0, 5.0)) diff --git a/tests/ekobox/test_genpdf.py b/tests/ekobox/test_genpdf.py index a379b2931..87fdac8f4 100644 --- a/tests/ekobox/test_genpdf.py +++ b/tests/ekobox/test_genpdf.py @@ -28,8 +28,6 @@ def test_genpdf_exceptions(tmp_path, cd): 10, ) # non-existant PDF set - with pytest.raises(FileNotFoundError): - genpdf.install_pdf("foo") with pytest.raises(TypeError): genpdf.generate_pdf("debug", [21], info_update=(10, 15, 20)) @@ -39,11 +37,9 @@ def test_generate_block(): mu2s = np.geomspace(1.0, 1e3, 5).tolist() pids = np.arange(3) b = genpdf.generate_block(lambda pid, x, q2: pid * x * q2, xg, mu2s, pids) - assert isinstance(b, dict) - assert sorted(b.keys()) == sorted(["data", "mu2grid", "xgrid", "pids"]) - assert isinstance(b["data"], np.ndarray) - assert b["data"].shape == (len(xg) * len(mu2s), len(pids)) - assert b["mu2grid"] == sorted(b["mu2grid"]) + assert isinstance(b.data, np.ndarray) + assert b.data.shape == (len(xg) * len(mu2s), len(pids)) + np.testing.assert_allclose(b.qgrid, sorted(b.qgrid)) def test_install_pdf(fake_lhapdf, cd): @@ -100,19 +96,16 @@ def test_generate_pdf_debug_pid(fake_lhapdf, cd): ppm = pp / f"{n}_0000.dat" assert ppm.exists() assert "PdfType: central" in ppm.read_text() - head, blocks = genpdf.load.load_blocks_from_file(n, 0) - assert "PdfType: central" in head - assert len(blocks) == 1 - b = blocks[0] - assert 21 in b["pids"] - for k, line in enumerate(b["data"]): - for pid, f in zip(b["pids"], line): + f = genpdf.load.load_blocks_from_file(n, 0) + assert "PdfType" in f.header + assert f.header["PdfType"] == "central" + assert len(f.blocks) == 1 + b = f.blocks[0] + assert 21 in b.pids + for k, line in enumerate(b.data): + for pid, f in zip(b.pids, line): # the gluon is non-zero in the bulk - x=0 is included here - if ( - pid == 21 - and k > len(b["mu2grid"]) - 1 - and k < len(b["data"]) - len(b["mu2grid"]) - ): + if pid == 21 and k > len(b.qgrid) - 1 and k < len(b.data) - len(b.qgrid): assert not f == 0.0 else: assert f == 0.0 @@ -143,23 +136,25 @@ def test_generate_pdf_pdf_evol(fake_lhapdf, fake_nn31, fake_mstw, fake_ct14, cd) for m in range(nmem + 1): pm = p / f"{n}_{m:04d}.dat" assert pm.exists() - head, blocks = genpdf.load.load_blocks_from_file(n, m) - assert ("PdfType: central" if m == 0 else f"PdfType: {err_type}") in head - assert len(blocks) == nb - for b in blocks: - for k, line in enumerate(b["data"]): - for pid, f in zip(b["pids"], line): + f = genpdf.load.load_blocks_from_file(n, m) + assert "PdfType" in f.header + if m == 0: + assert f.header["PdfType"] == "central" + else: + assert f.header["PdfType"] == err_type + assert len(f.blocks) == nb + for b in f.blocks: + for k, line in enumerate(b.data): + for pid, f in zip(b.pids, line): # the singlet is non-zero in the bulk - x >= 0 - if abs(pid) in range(1, 6 + 1) and k < len(b["data"]) - len( - b["mu2grid"] + if abs(pid) in range(1, 6 + 1) and k < len(b.data) - len( + b.qgrid ): assert not f == 0.0 assert not np.isclose(f, 0.0, atol=1e-15) else: # MSTW is not 0 at the end, but only almost - if err_type == "error" and k >= len(b["data"]) - len( - b["mu2grid"] - ): + if err_type == "error" and k >= len(b.data) - len(b.qgrid): np.testing.assert_allclose(f, 0.0, atol=1e-15) else: assert f == 0.0 @@ -194,14 +189,15 @@ def test_generate_pdf_toy_antiqed(fake_lhapdf, cd): pm = p / f"{n}_0000.dat" assert pm.exists() assert "PdfType: central" in pm.read_text() - head, blocks = genpdf.load.load_blocks_from_file(n, 0) - assert "PdfType: central" in head - assert len(blocks) == 1 - b = blocks[0] - for k, line in enumerate(b["data"]): - for pid, f in zip(b["pids"], line): + f = genpdf.load.load_blocks_from_file(n, 0) + assert "PdfType" in f.header + assert f.header["PdfType"] == "central" + assert len(f.blocks) == 1 + b = f.blocks[0] + for k, line in enumerate(b.data): + for pid, f in zip(b.pids, line): # the u and d are non-zero in the bulk - x=0 is not included here - if abs(pid) in [1, 2] and k < len(b["data"]) - len(b["mu2grid"]): + if abs(pid) in [1, 2] and k < len(b.data) - len(b.qgrid): assert not f == 0.0 else: assert f == 0.0 diff --git a/tests/ekobox/test_genpdf_export.py b/tests/ekobox/test_genpdf_export.py index c87df43b3..e3abf5835 100644 --- a/tests/ekobox/test_genpdf_export.py +++ b/tests/ekobox/test_genpdf_export.py @@ -2,24 +2,7 @@ import yaml from ekobox import genpdf - - -def test_list_to_str(): - a = genpdf.export.list_to_str([1, 2]) - assert isinstance(a, str) - assert "1." in a - b = genpdf.export.list_to_str([1.0, 2.0]) - assert isinstance(b, str) - assert "1." in a - - -def test_array_to_str(): - s = (2, 2) - a = genpdf.export.array_to_str(np.arange(4).reshape(s)) - assert isinstance(a, str) - assert "1." in a - b = np.array([e.split() for e in a.splitlines()]) - assert b.shape == s +from ekobox.genpdf.parser import LhapdfDataBlock, LhapdfDataFile def test_dump_info(tmp_path): @@ -50,42 +33,14 @@ def fake_blocks(n_blocks, n_x, n_q2, n_pids): bs = [] for _ in range(n_blocks): bs.append( - { - "xgrid": np.linspace(0.0, 1.0, n_x), - "mu2grid": np.geomspace(1.0, 1e3, n_q2), - "pids": np.arange(n_pids), - "data": np.random.rand(n_x * n_q2, n_pids), - } + LhapdfDataBlock( + xgrid=np.linspace(0.0, 1.0, n_x), + qgrid=np.geomspace(1.0, 1e3, n_q2), + pids=np.arange(n_pids), + data=np.random.rand(n_x * n_q2, n_pids), + ) ) - return bs - - -def test_dump_blocks(tmp_path): - n = "test" - p = tmp_path / n - nb = 2 - for m in range(3): - f = p / f"{n}_{m:04d}.dat" - is_my_type = m > 1 - pdf_type = "Bla: blub" if is_my_type else None - g = genpdf.export.dump_blocks(p, m, fake_blocks(nb, 2, 2, 2), pdf_type=pdf_type) - assert p.exists() - assert f.exists() - assert f == g - cnt = f.read_text() - if is_my_type: - assert "Bla: blub" in cnt - else: - assert ("central" in cnt) == (m == 0) - assert "Format" in cnt - assert cnt.count("---") == nb + 1 - - -def test_dump_blocks_to_file(tmp_path): - f = tmp_path / "mem.dat" - g = genpdf.export.dump_blocks(f, 0, fake_blocks(2, 2, 2, 2)) - assert f.exists() - assert f == g + return LhapdfDataFile(header={}, blocks=bs) def test_dump_set(tmp_path): @@ -93,7 +48,7 @@ def test_dump_set(tmp_path): p = tmp_path / n i = {"a": "b", "c": 2} nmem = 2 - for pdf_type_list in (None, ["Bla: Blub"] * nmem): + for pdf_type_list in (None, [{"Bla": "Blub"}] * nmem): genpdf.export.dump_set( p, i, [fake_blocks(2, 2, 2, 2) for _ in range(nmem)], pdf_type_list ) diff --git a/tests/ekobox/test_genpdf_flavors.py b/tests/ekobox/test_genpdf_flavors.py index e1f3d695e..3c3cea0d5 100644 --- a/tests/ekobox/test_genpdf_flavors.py +++ b/tests/ekobox/test_genpdf_flavors.py @@ -1,6 +1,7 @@ import numpy as np from ekobox import genpdf +from ekobox.genpdf.parser import LhapdfDataBlock, LhapdfDataFile def test_is_evolution(): @@ -34,53 +35,61 @@ def test_flavors_evol_to_flavor(): def test_flavors_evol_raw(): - blocks = [ - { - "mu2grid": np.array([1, 2]), - "xgrid": np.array([0.1, 1.0]), - "pids": np.array([-1, 21, 1]), - "data": np.array([[0.1, 0.2, 0.1]] * 4), - } - ] - gonly = genpdf.flavors.project(blocks, genpdf.flavors.evol_to_flavor(["g"])) - assert len(gonly) == 1 + f = LhapdfDataFile( + header={}, + blocks=[ + LhapdfDataBlock( + xgrid=np.array([0.1, 1.0]), + qgrid=np.array([1, 2]), + pids=np.array([-1, 21, 1]), + data=np.array([[0.1, 0.2, 0.1]] * 4), + ) + ], + ) + gonly = genpdf.flavors.project(f, genpdf.flavors.evol_to_flavor(["g"])) + assert len(gonly.blocks) == 1 np.testing.assert_allclose( - gonly[0]["data"], + gonly.blocks[0].data, np.array( [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] * 4 ), ) - Sonly = genpdf.flavors.project(blocks, genpdf.flavors.evol_to_flavor(["S"])) - assert len(Sonly) == 1 + Sonly = genpdf.flavors.project(f, genpdf.flavors.evol_to_flavor(["S"])) + assert len(Sonly.blocks) == 1 for i in [0, 1, 2, 3]: # g and gamma are zero - np.testing.assert_allclose(Sonly[0]["data"][i][7], 0) - np.testing.assert_allclose(Sonly[0]["data"][i][0], 0) + np.testing.assert_allclose(Sonly.blocks[0].data[i][7], 0) + np.testing.assert_allclose(Sonly.blocks[0].data[i][0], 0) # quark are all equal and equal to anti-quarks for pid in [2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13]: - np.testing.assert_allclose(Sonly[0]["data"][i][pid], Sonly[0]["data"][i][1]) + np.testing.assert_allclose( + Sonly.blocks[0].data[i][pid], Sonly.blocks[0].data[i][1] + ) def test_flavors_evol_nodata(): # try with a block without data - blocks = [ - { - "mu2grid": np.array([1, 2]), - "xgrid": np.array([0.1, 1.0]), - "pids": np.array([-1, 21, 1]), - "data": np.array([]), - }, - { - "mu2grid": np.array([1, 2]), - "xgrid": np.array([0.1, 1.0]), - "pids": np.array([-1, 21, 1]), - "data": np.array([[0.1, 0.2, 0.1]] * 4), - }, - ] - gonly = genpdf.flavors.project(blocks, genpdf.flavors.evol_to_flavor(["g"])) - assert len(gonly) == 2 + f = LhapdfDataFile( + header={}, + blocks=[ + LhapdfDataBlock( + xgrid=np.array([0.1, 1.0]), + qgrid=np.array([1, 2]), + pids=np.array([-1, 21, 1]), + data=np.array([]), + ), + LhapdfDataBlock( + xgrid=np.array([0.1, 1.0]), + qgrid=np.array([1, 2]), + pids=np.array([-1, 21, 1]), + data=np.array([[0.1, 0.2, 0.1]] * 4), + ), + ], + ) + gonly = genpdf.flavors.project(f, genpdf.flavors.evol_to_flavor(["g"])) + assert len(gonly.blocks) == 2 np.testing.assert_allclose( - gonly[1]["data"], + gonly.blocks[1].data, np.array( [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] * 4 ), diff --git a/tests/ekobox/test_genpdf_load.py b/tests/ekobox/test_genpdf_load.py index aab51cc17..4e648bf87 100644 --- a/tests/ekobox/test_genpdf_load.py +++ b/tests/ekobox/test_genpdf_load.py @@ -1,5 +1,3 @@ -import numpy as np - from ekobox import genpdf @@ -11,11 +9,5 @@ def test_load_info(fake_ct14): def test_load_data_ct14(fake_ct14): - blocks = genpdf.load.load_blocks_from_file(fake_ct14, 0)[1] - assert len(blocks) == 1 - b0 = blocks[0] - assert isinstance(b0, dict) - assert sorted(b0.keys()) == sorted(["pids", "xgrid", "mu2grid", "data"]) - assert sorted(b0["pids"]) == sorted([-3, -2, -1, 21, 1, 2, 3]) - assert len(b0["data"].T) == 7 - np.testing.assert_allclose(b0["xgrid"][0], 1e-9) + f = genpdf.load.load_blocks_from_file(fake_ct14, 0) + assert len(f.blocks) == 1 diff --git a/tests/ekobox/test_genpdf_parser.py b/tests/ekobox/test_genpdf_parser.py new file mode 100644 index 000000000..76968026f --- /dev/null +++ b/tests/ekobox/test_genpdf_parser.py @@ -0,0 +1,64 @@ +import numpy as np + +from ekobox.genpdf.parser import LhapdfDataFile + + +def test_genpdf_parser_ct14(fake_lhapdf, fake_ct14, tmp_path): + # read + f = LhapdfDataFile.read_with_set(fake_lhapdf, fake_ct14) + assert "PdfType" in f.header + assert f.header["PdfType"] == "central" + assert len(f.blocks) == 1 + b0 = f.blocks[0] + assert b0.is_valid() + assert sorted(b0.pids) == sorted([-3, -2, -1, 21, 1, 2, 3]) + assert len(b0.data.T) == 7 + np.testing.assert_allclose(b0.xgrid[0], 1e-9) + # write + t = tmp_path / "blub.dat" + f.write(t) + # read back + g = LhapdfDataFile.read(t) + assert g.header == f.header + assert len(g.blocks) == len(f.blocks) + c0 = g.blocks[0] + np.testing.assert_allclose(c0.xgrid, b0.xgrid) + np.testing.assert_allclose(c0.qgrid, b0.qgrid) + np.testing.assert_allclose(c0.pids, b0.pids) + np.testing.assert_allclose(c0.data, b0.data) + + +def test_genpdf_parser_mstw(fake_lhapdf, fake_mstw): + # read + f0 = LhapdfDataFile.read_with_set(fake_lhapdf, fake_mstw, 0) + assert "PdfType" in f0.header + assert f0.header["PdfType"] == "central" + assert len(f0.blocks) == 3 + for b in f0.blocks: + assert b.is_valid() + b0 = f0.blocks[0] + assert sorted(b0.pids) == sorted([-5, -4, -3, -2, -1, 21, 1, 2, 3, 4, 5]) + assert len(b0.data.T) == 11 + np.testing.assert_allclose(b0.xgrid[0], 1e-6) + # check also first member + f1 = LhapdfDataFile.read_with_set(fake_lhapdf, fake_mstw, 1) + assert "PdfType" in f1.header + assert f1.header["PdfType"] == "error" + + +def test_genpdf_parser_nn31(fake_lhapdf, fake_nn31): + # read + f0 = LhapdfDataFile.read_with_set(fake_lhapdf, fake_nn31, 0) + assert "PdfType" in f0.header + assert f0.header["PdfType"] == "central" + assert len(f0.blocks) == 2 + for b in f0.blocks: + assert b.is_valid() + b0 = f0.blocks[0] + assert sorted(b0.pids) == sorted([-5, -4, -3, -2, -1, 21, 1, 2, 3, 4, 5]) + assert len(b0.data.T) == 11 + np.testing.assert_allclose(b0.xgrid[0], 1e-9) + # check also first member + f1 = LhapdfDataFile.read_with_set(fake_lhapdf, fake_nn31, 1) + assert "PdfType" in f1.header + assert f1.header["PdfType"] == "replica" From ecabc2b725f470a910f20c86ca843d7daca67467 Mon Sep 17 00:00:00 2001 From: Felix Hekhorn Date: Wed, 8 Apr 2026 15:40:59 +0300 Subject: [PATCH 2/5] Drop Self type due to py3.10 --- src/ekobox/genpdf/parser.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/ekobox/genpdf/parser.py b/src/ekobox/genpdf/parser.py index 202f2e3d5..efca09007 100644 --- a/src/ekobox/genpdf/parser.py +++ b/src/ekobox/genpdf/parser.py @@ -3,7 +3,6 @@ import pathlib from dataclasses import dataclass from io import StringIO -from typing import Self, Type import numpy as np import numpy.typing as npt @@ -25,7 +24,7 @@ class LhapdfDataBlock: data: npt.NDArray[np.float64] """Tabulated data.""" - def is_valid(self: Self) -> bool: + def is_valid(self) -> bool: """Check if dimensions are reasonable.""" for a in [self.xgrid, self.qgrid, self.pids]: if len(a.shape) != 1 or len(a) <= 0: @@ -42,14 +41,12 @@ class LhapdfDataFile: blocks: list[LhapdfDataBlock] """Data blocks.""" - def __init__( - self: Self, header: dict[str, str], blocks: list[LhapdfDataBlock] - ) -> None: + def __init__(self, header: dict[str, str], blocks: list[LhapdfDataBlock]) -> None: self.header = header self.blocks = blocks @classmethod - def read(cls: Type[Self], path: pathlib.Path) -> Self: + def read(cls, path: pathlib.Path): """Read from file.""" cnt = path.read_text().split("---\n") # header @@ -74,7 +71,7 @@ def read(cls: Type[Self], path: pathlib.Path) -> Self: ) return cls(header=header, blocks=blocks) - def write(self: Self, path: pathlib.Path) -> int: + def write(self, path: pathlib.Path) -> int: """Write to file.""" cnt = "" # header @@ -100,8 +97,6 @@ def write(self: Self, path: pathlib.Path) -> int: return path.write_text(cnt) @classmethod - def read_with_set( - cls: Type[Self], path: pathlib.Path, pdfset: str, member: int = 0 - ) -> Self: + def read_with_set(cls, path: pathlib.Path, pdfset: str, member: int = 0): """Read given `member` from given `pdfset` inside `path`.""" return cls.read(path / pdfset / f"{pdfset}_{member:04d}.dat") From 307fcfe8ffac61182cecede060358f76e768fd20 Mon Sep 17 00:00:00 2001 From: Felix Hekhorn Date: Wed, 8 Apr 2026 15:45:52 +0300 Subject: [PATCH 3/5] Add Changelog entry --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb4e3aab7..fe04e96ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,9 @@ Items without prefix refer to a global change. ## [Unreleased](https://github.com/NNPDF/eko/compare/v0.15.3...HEAD) +### Changed +- py: Add LHAPDF parser to `genpdf` (soft breaking change) ([#506](https://github.com/NNPDF/eko/pull/506)) + ## [0.15.3](https://github.com/NNPDF/eko/compare/v0.15.2...v0.15.3) - 2026-02-05 ### Added From 880d5fca252376847e6f68959309e167f4e4ef22 Mon Sep 17 00:00:00 2001 From: Felix Hekhorn Date: Fri, 10 Apr 2026 17:11:54 +0300 Subject: [PATCH 4/5] Add LhapdfBlock.add --- src/ekobox/genpdf/parser.py | 22 ++++++++++++ tests/ekobox/test_genpdf_parser.py | 55 +++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/src/ekobox/genpdf/parser.py b/src/ekobox/genpdf/parser.py index efca09007..44706d58a 100644 --- a/src/ekobox/genpdf/parser.py +++ b/src/ekobox/genpdf/parser.py @@ -31,6 +31,28 @@ def is_valid(self) -> bool: return False return self.data.shape == (len(self.xgrid) * len(self.qgrid), len(self.pids)) + def add(self, other): + """Add other block.""" + # x and q have to be the same + np.testing.assert_allclose(self.xgrid, other.xgrid) + np.testing.assert_allclose(self.qgrid, other.qgrid) + # PID we can recover + tot_pids = np.unique(np.concatenate([self.pids, other.pids])) + tot_data = [] + for pid in tot_pids: + if pid in self.pids and pid in other.pids: + tot_data.append( + self.data[:, self.pids.searchsorted(pid)].copy() + + other.data[:, other.pids.searchsorted(pid)].copy() + ) + elif pid in self.pids: + tot_data.append(self.data[:, self.pids.searchsorted(pid)].copy()) + elif pid in other.pids: + tot_data.append(other.data[:, other.pids.searchsorted(pid)].copy()) + return LhapdfDataBlock( + xgrid=self.xgrid, qgrid=self.qgrid, pids=tot_pids, data=np.array(tot_data).T + ) + class LhapdfDataFile: """LHAPDF data file.""" diff --git a/tests/ekobox/test_genpdf_parser.py b/tests/ekobox/test_genpdf_parser.py index 76968026f..6caf64057 100644 --- a/tests/ekobox/test_genpdf_parser.py +++ b/tests/ekobox/test_genpdf_parser.py @@ -1,6 +1,6 @@ import numpy as np -from ekobox.genpdf.parser import LhapdfDataFile +from ekobox.genpdf.parser import LhapdfDataBlock, LhapdfDataFile def test_genpdf_parser_ct14(fake_lhapdf, fake_ct14, tmp_path): @@ -62,3 +62,56 @@ def test_genpdf_parser_nn31(fake_lhapdf, fake_nn31): f1 = LhapdfDataFile.read_with_set(fake_lhapdf, fake_nn31, 1) assert "PdfType" in f1.header assert f1.header["PdfType"] == "replica" + + +def test_genpdf_parser_block_add(): + a = LhapdfDataBlock( + xgrid=np.array([0.5]), + qgrid=np.array([10.0]), + pids=np.array([1]), + data=np.array([[1.0], [-1.0]]), + ) + b = LhapdfDataBlock( + xgrid=np.array([0.5]), + qgrid=np.array([10.0]), + pids=np.array([1]), + data=np.array([[2.0], [1.0]]), + ) + c = a.add(b) + np.testing.assert_allclose(c.xgrid, a.xgrid) + np.testing.assert_allclose(c.qgrid, a.qgrid) + np.testing.assert_allclose(c.pids, a.pids) + np.testing.assert_allclose(c.data, np.array([[3.0], [0.0]])) + b = LhapdfDataBlock( + xgrid=np.array([0.5]), + qgrid=np.array([10.0]), + pids=np.array([2]), + data=np.array([[2.0], [1.0]]), + ) + c = a.add(b) + np.testing.assert_allclose(c.xgrid, a.xgrid) + np.testing.assert_allclose(c.qgrid, a.qgrid) + np.testing.assert_allclose(c.pids, np.array([1, 2])) + np.testing.assert_allclose(c.data, np.array([[1.0, 2.0], [-1.0, 1.0]])) + b = LhapdfDataBlock( + xgrid=np.array([0.5]), + qgrid=np.array([10.0]), + pids=np.array([2]), + data=np.array([[2.0], [1.0]]), + ) + c = a.add(b) + np.testing.assert_allclose(c.xgrid, a.xgrid) + np.testing.assert_allclose(c.qgrid, a.qgrid) + np.testing.assert_allclose(c.pids, np.array([1, 2])) + np.testing.assert_allclose(c.data, np.array([[1.0, 2.0], [-1.0, 1.0]])) + b = LhapdfDataBlock( + xgrid=np.array([0.5]), + qgrid=np.array([10.0]), + pids=np.array([1, 2]), + data=np.array([[3.0, 2.0], [4.0, 1.0]]), + ) + c = a.add(b) + np.testing.assert_allclose(c.xgrid, a.xgrid) + np.testing.assert_allclose(c.qgrid, a.qgrid) + np.testing.assert_allclose(c.pids, np.array([1, 2])) + np.testing.assert_allclose(c.data, np.array([[4.0, 2.0], [3.0, 1.0]])) From e3bc71d9dda320a57cea3c035a997c8fad7f0070 Mon Sep 17 00:00:00 2001 From: Felix Hekhorn Date: Mon, 13 Apr 2026 11:23:37 +0300 Subject: [PATCH 5/5] Fix LhapdfBlock.add --- src/ekobox/genpdf/parser.py | 8 ++++---- tests/ekobox/test_genpdf_parser.py | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/ekobox/genpdf/parser.py b/src/ekobox/genpdf/parser.py index 44706d58a..8d8d6b20c 100644 --- a/src/ekobox/genpdf/parser.py +++ b/src/ekobox/genpdf/parser.py @@ -42,13 +42,13 @@ def add(self, other): for pid in tot_pids: if pid in self.pids and pid in other.pids: tot_data.append( - self.data[:, self.pids.searchsorted(pid)].copy() - + other.data[:, other.pids.searchsorted(pid)].copy() + self.data[:, self.pids.tolist().index(pid)].copy() + + other.data[:, other.pids.tolist().index(pid)].copy() ) elif pid in self.pids: - tot_data.append(self.data[:, self.pids.searchsorted(pid)].copy()) + tot_data.append(self.data[:, self.pids.tolist().index(pid)].copy()) elif pid in other.pids: - tot_data.append(other.data[:, other.pids.searchsorted(pid)].copy()) + tot_data.append(other.data[:, other.pids.tolist().index(pid)].copy()) return LhapdfDataBlock( xgrid=self.xgrid, qgrid=self.qgrid, pids=tot_pids, data=np.array(tot_data).T ) diff --git a/tests/ekobox/test_genpdf_parser.py b/tests/ekobox/test_genpdf_parser.py index 6caf64057..05aa2887e 100644 --- a/tests/ekobox/test_genpdf_parser.py +++ b/tests/ekobox/test_genpdf_parser.py @@ -115,3 +115,20 @@ def test_genpdf_parser_block_add(): np.testing.assert_allclose(c.qgrid, a.qgrid) np.testing.assert_allclose(c.pids, np.array([1, 2])) np.testing.assert_allclose(c.data, np.array([[4.0, 2.0], [3.0, 1.0]])) + a = LhapdfDataBlock( + xgrid=np.array([0.5]), + qgrid=np.array([10.0]), + pids=np.array([1, -1]), + data=np.array([[1.0, 2.0], [3.0, 4.0]]), + ) + b = LhapdfDataBlock( + xgrid=np.array([0.5]), + qgrid=np.array([10.0]), + pids=np.array([-1, 1]), + data=np.array([[20.0, 10.0], [40.0, 30.0]]), + ) + c = a.add(b) + np.testing.assert_allclose(c.xgrid, a.xgrid) + np.testing.assert_allclose(c.qgrid, a.qgrid) + np.testing.assert_allclose(c.pids, np.array([-1, 1])) + np.testing.assert_allclose(c.data, np.array([[22.0, 11.0], [44.0, 33.0]]))