From 76a8ec4dfa510f5ccc4d23366a169b73859ade18 Mon Sep 17 00:00:00 2001
From: Felix Hekhorn <felix.a.hekhorn@jyu.fi>
Date: Wed, 8 Apr 2026 15:28:58 +0300
Subject: [PATCH 1/5] Add LHAPDF parser

---
 src/ekobox/evol_pdf.py              |   7 +-
 src/ekobox/genpdf/__init__.py       |  62 +++++++++------
 src/ekobox/genpdf/export.py         | 114 ++++++----------------------
 src/ekobox/genpdf/flavors.py        |  31 +++++---
 src/ekobox/genpdf/load.py           |  43 ++---------
 src/ekobox/genpdf/parser.py         | 107 ++++++++++++++++++++++++++
 tests/ekobox/test_evol_pdf.py       |  10 +--
 tests/ekobox/test_genpdf.py         |  70 ++++++++---------
 tests/ekobox/test_genpdf_export.py  |  63 +++------------
 tests/ekobox/test_genpdf_flavors.py |  75 ++++++++++--------
 tests/ekobox/test_genpdf_load.py    |  12 +--
 tests/ekobox/test_genpdf_parser.py  |  64 ++++++++++++++++
 12 files changed, 356 insertions(+), 302 deletions(-)
 create mode 100644 src/ekobox/genpdf/parser.py
 create mode 100644 tests/ekobox/test_genpdf_parser.py

diff --git a/src/ekobox/evol_pdf.py b/src/ekobox/evol_pdf.py
index 6de6db087..5f6ed5a88 100644
--- a/src/ekobox/evol_pdf.py
+++ b/src/ekobox/evol_pdf.py
@@ -9,6 +9,7 @@
 from eko.runner import managed
 
 from . import apply, genpdf, info_file
+from .genpdf.parser import LhapdfDataFile
 from .utils import regroup_evolgrid
 
 DEFAULT_NAME = "eko.tar"
@@ -111,7 +112,9 @@ def evolve_pdfs(
         genpdf.install_pdf(name)
 
 
-def collect_blocks(evolved_PDF: dict, q2block_per_nf: dict, xgrid: list):
+def collect_blocks(
+    evolved_PDF: dict, q2block_per_nf: dict, xgrid: list
+) -> LhapdfDataFile:
     """Collect all LHAPDF blocks for a given replica.
 
     Parameters
@@ -139,4 +142,4 @@ def pdf_xq2(pid, x, Q2):
             pids=np.array(br.flavor_basis_pids),
         )
         all_blocks.append(block)
-    return all_blocks
+    return LhapdfDataFile(header={}, blocks=all_blocks)
diff --git a/src/ekobox/genpdf/__init__.py b/src/ekobox/genpdf/__init__.py
index 5a8d3de17..a759d2379 100644
--- a/src/ekobox/genpdf/__init__.py
+++ b/src/ekobox/genpdf/__init__.py
@@ -12,6 +12,7 @@
 from eko.io.types import EvolutionPoint as EPoint
 
 from . import export, flavors, load
+from .parser import LhapdfDataBlock, LhapdfDataFile
 
 logger = logging.getLogger(__name__)
 
@@ -21,7 +22,7 @@ def take_data(
     members: bool = False,
     xgrid: Optional[List[float]] = None,
     evolgrid: Optional[List[EPoint]] = None,
-):
+) -> tuple[dict, list[dict[str, str]], list[LhapdfDataFile]]:
     """Auxiliary function for `generate_pdf`.
 
     It provides the info, the heads of the member files and the blocks
@@ -60,35 +61,46 @@ def take_data(
             else:
                 toylh = toy.mkPDF("", 0)
             all_blocks.append(
-                [
-                    generate_block(
-                        toylh.xfxQ2, xgrid, sorted_q2grid, list(br.flavor_basis_pids)
-                    )
-                ]
+                LhapdfDataFile(
+                    header={"PdfType": "central"},
+                    blocks=[
+                        generate_block(
+                            toylh.xfxQ2,
+                            xgrid,
+                            sorted_q2grid,
+                            list(br.flavor_basis_pids),
+                        )
+                    ],
+                )
             )
 
         else:
             info = load.load_info_from_file(parent_pdf_set)
             # iterate on members
             for m in range(int(info["NumMembers"])):
-                head, blocks = load.load_blocks_from_file(parent_pdf_set, m)
-                heads.append(head)
-                all_blocks.append(blocks)
+                f = load.load_blocks_from_file(parent_pdf_set, m)
+                heads.append(f.header)
+                all_blocks.append(f)
                 if not members:
                     break
     elif isinstance(parent_pdf_set, dict):
         info = copy.deepcopy(load.template_info)
         all_blocks.append(
-            [
-                generate_block(
-                    lambda pid, x, Q2: (
-                        0.0 if pid not in parent_pdf_set else parent_pdf_set[pid](x, Q2)
-                    ),
-                    xgrid,
-                    sorted_q2grid,
-                    list(br.flavor_basis_pids),
-                )
-            ]
+            LhapdfDataFile(
+                header={"PdfType": "central"},
+                blocks=[
+                    generate_block(
+                        lambda pid, x, Q2: (
+                            0.0
+                            if pid not in parent_pdf_set
+                            else parent_pdf_set[pid](x, Q2)
+                        ),
+                        xgrid,
+                        sorted_q2grid,
+                        list(br.flavor_basis_pids),
+                    )
+                ],
+            )
         )
     else:
         raise ValueError("Unknown parent pdf type")
@@ -188,7 +200,7 @@ def generate_pdf(
         info["ForcePositive"] = 0
     info["NumMembers"] = len(new_all_blocks)
     # exporting
-    export.dump_set(name, info, new_all_blocks, pdf_type_list=heads)
+    export.dump_set(name, info, new_all_blocks, heads)
 
     # install
     if install:
@@ -220,12 +232,14 @@ def install_pdf(name):
 
 def generate_block(
     xfxQ2: Callable, xgrid: List[float], sorted_q2grid: List[float], pids: List[int]
-) -> dict:
+) -> LhapdfDataBlock:
     """Generate an LHAPDF data block from a callable."""
-    block: dict = dict(mu2grid=sorted_q2grid, pids=pids, xgrid=xgrid)
+    qgrid = np.sqrt(sorted_q2grid)
     data = []
     for x in xgrid:
         for mu2 in sorted_q2grid:
             data.append(np.array([xfxQ2(pid, x, mu2) for pid in pids]))
-    block["data"] = np.array(data)
-    return block
+    data = np.array(data)
+    return LhapdfDataBlock(
+        xgrid=np.array(xgrid), qgrid=qgrid, pids=np.array(pids), data=data
+    )
diff --git a/src/ekobox/genpdf/export.py b/src/ekobox/genpdf/export.py
index 273046871..3e577d9d4 100644
--- a/src/ekobox/genpdf/export.py
+++ b/src/ekobox/genpdf/export.py
@@ -5,89 +5,9 @@
 import re
 from typing import Optional
 
-import numpy as np
 import yaml
 
-
-def list_to_str(ls, fmt="%.6e"):
-    """Convert a list of numbers to a string.
-
-    Parameters
-    ----------
-    ls : list(float)
-        list
-    fmt : str
-        format string
-
-    Returns
-    -------
-    str :
-        final string
-    """
-    return " ".join([fmt % x for x in ls])
-
-
-def array_to_str(ar):
-    """Convert an array of numbers to a string.
-
-    Parameters
-    ----------
-    ar : list(list(float))
-        array
-
-    Returns
-    -------
-    str :
-        final string
-    """
-    table = ""
-    for line in ar:
-        table += f"{line[0]:.8e} " + list_to_str(line[1:], fmt="%.8e") + "\n"
-    return table
-
-
-def dump_blocks(
-    name: str, member: int, blocks: list[dict], pdf_type: Optional[str] = None
-) -> pathlib.Path:
-    """Write LHAPDF data file.
-
-    Parameters
-    ----------
-    name : str or os.PathLike
-        target name or path
-    member : int
-        PDF member
-    blocks : list(dict)
-        pdf blocks of data
-    pdf_type : str
-        PdfType to be copied in the head of member files
-
-    Returns
-    -------
-        pathlib.Path : target file
-    """
-    path_name = pathlib.Path(name)
-    if path_name.suffix == ".dat":
-        target = path_name
-    else:
-        target = path_name / f"{path_name.stem}_{member:04d}.dat"
-    target.parent.mkdir(exist_ok=True)
-    with open(target, "w", encoding="utf-8") as o:
-        if pdf_type is None:
-            if member == 0:
-                o.write("PdfType: central\n")
-            else:
-                o.write("PdfType: replica\n")
-        else:
-            o.write(pdf_type)
-        o.write("Format: lhagrid1\n---\n")
-        for b in blocks:
-            o.write(list_to_str(b["xgrid"]) + "\n")
-            o.write(list_to_str(list(np.sqrt(b["mu2grid"]))) + "\n")
-            o.write(list_to_str(b["pids"], "%d") + "\n")
-            o.write(array_to_str(b["data"]))
-            o.write("---\n")
-    return target
+from .parser import LhapdfDataFile
 
 
 def dump_info(name: str, info: dict) -> pathlib.Path:
@@ -125,23 +45,35 @@ def dump_info(name: str, info: dict) -> pathlib.Path:
     return target
 
 
-def dump_set(name, info, member_blocks, pdf_type_list=None):
+def dump_set(
+    name: str,
+    info: dict,
+    member_files: list[LhapdfDataFile],
+    header_list: Optional[list[dict[str, str]]] = None,
+) -> None:
     """Dump a whole set.
 
     Parameters
     ----------
-    name : str
+    name :
         target name
-    info : dict
+    info :
         info dictionary
-    member_blocks : list(list(dict))
+    member_files :
         blocks for all members
-    pdf_type : list(str)
-        list of strings to be copied in the head of member files
+    header_list :
+        list of optional additional headers to be copied in the head of member files
     """
     dump_info(name, info)
-    for mem, blocks in enumerate(member_blocks):
-        if not isinstance(pdf_type_list, list) or len(pdf_type_list) == 0:
-            dump_blocks(name, mem, blocks)
+    for mem, f in enumerate(member_files):
+        # update header if necessary
+        if header_list is not None and len(header_list) > 0:
+            f.header.update(header_list[mem])
+        # find path
+        path_name = pathlib.Path(name)
+        if path_name.suffix == ".dat":
+            target = path_name
         else:
-            dump_blocks(name, mem, blocks, pdf_type=pdf_type_list[mem])
+            target = path_name / f"{path_name.stem}_{mem:04d}.dat"
+        target.parent.mkdir(exist_ok=True)
+        f.write(target)
diff --git a/src/ekobox/genpdf/flavors.py b/src/ekobox/genpdf/flavors.py
index d9ee5e6a7..ca269a855 100644
--- a/src/ekobox/genpdf/flavors.py
+++ b/src/ekobox/genpdf/flavors.py
@@ -6,6 +6,8 @@
 
 from eko import basis_rotation as br
 
+from .parser import LhapdfDataBlock, LhapdfDataFile
+
 
 def pid_to_flavor(pids):
     """Create flavor representations from PIDs.
@@ -50,26 +52,27 @@ def evol_to_flavor(labels):
     return np.array(ps)
 
 
-def project(blocks, reprs):
+def project(f: LhapdfDataFile, reprs) -> LhapdfDataFile:
     """Project some combination of flavors defined by reprs from the blocks.
 
     Parameters
     ----------
-    blocks : list(dict)
+    blocks :
         PDF blocks
-    reprs : list(int)
+    reprs :
         active distributions in flavor representation
 
     Returns
     -------
-    list(dict) :
+    LhapdfDataFile :
         filtered blocks
     """
-    new_blocks = copy.deepcopy(blocks)
-    for block in new_blocks:
-        current_pids = block["pids"]
-        current_data = block["data"].T
+    new_blocks = []
+    for block in f.blocks:
+        current_pids = block.pids
+        current_data = block.data.T
         if len(current_data) == 0:
+            new_blocks.append(copy.deepcopy(block))
             continue
         # load all flavors
         flavor_data = [np.zeros_like(current_data[0]) for pid in br.flavor_basis_pids]
@@ -81,9 +84,15 @@ def project(blocks, reprs):
         for elem in reprs:
             proj = elem[:, np.newaxis] * elem
             new_data += proj @ flavor_data / (elem @ elem)
-        block["pids"] = br.flavor_basis_pids
-        block["data"] = np.array(new_data).T
-    return new_blocks
+        new_blocks.append(
+            LhapdfDataBlock(
+                xgrid=block.xgrid,
+                qgrid=block.qgrid,
+                pids=br.flavor_basis_pids,
+                data=np.array(new_data).T,
+            )
+        )
+    return LhapdfDataFile(header=copy.deepcopy(f.header), blocks=new_blocks)
 
 
 def is_evolution_labels(labels):
diff --git a/src/ekobox/genpdf/load.py b/src/ekobox/genpdf/load.py
index fe3ab8860..2e973f1f1 100644
--- a/src/ekobox/genpdf/load.py
+++ b/src/ekobox/genpdf/load.py
@@ -2,9 +2,10 @@
 
 import pathlib
 
-import numpy as np
 import yaml
 
+from .parser import LhapdfDataFile
+
 here = pathlib.Path(__file__).parent
 # Expose the default template
 with open(here / "templatePDF.info", encoding="utf-8") as o:
@@ -13,7 +14,7 @@
     Toy_info = yaml.safe_load(t)
 
 
-def load_info_from_file(pdfset_name):
+def load_info_from_file(pdfset_name: str):
     """Load the info file from a parent pdf.
 
     Parameters
@@ -34,7 +35,7 @@ def load_info_from_file(pdfset_name):
     return info
 
 
-def load_blocks_from_file(pdfset_name, member):
+def load_blocks_from_file(pdfset_name: str, member: int = 0) -> LhapdfDataFile:
     """Load a pdf from a parent pdf.
 
     Parameters
@@ -46,38 +47,10 @@ def load_blocks_from_file(pdfset_name, member):
 
     Returns
     -------
-    str :
-        head of member file
-    list(dict) :
-        pdf blocks of data
+    LhapdfDataFile:
+        data file
     """
     import lhapdf  # pylint: disable=import-error, import-outside-toplevel
 
-    src = pathlib.Path(lhapdf.paths()[0]) / pdfset_name
-    # read actual file
-    cnt = []
-    with open(src / f"{pdfset_name}_{member:04d}.dat", encoding="utf-8") as o:
-        cnt = o.readlines()
-    # file head
-    head = cnt[0]
-    head_section = cnt.index("---\n")
-    blocks = []
-    while head_section < len(cnt) - 1:
-        # section head
-        next_head_section = cnt.index("---\n", head_section + 1)
-        # determine participating pids
-        xgrid = np.array(cnt[head_section + 1].strip().split(" "), dtype=np.float64)
-        Qgrid = np.array(cnt[head_section + 2].strip().split(" "), dtype=np.float64)
-        pids = np.array(cnt[head_section + 3].strip().split(" "), dtype=np.int_)
-        # data
-        data = []
-        for line in cnt[head_section + 4 : next_head_section]:
-            elems = np.fromstring(line.strip(), sep=" ")
-            data.append(elems)
-        mu2grid = [el * el for el in Qgrid]
-        blocks.append(
-            dict(xgrid=xgrid, mu2grid=mu2grid, pids=pids, data=np.array(data))
-        )
-        # iterate
-        head_section = next_head_section
-    return head, blocks
+    src = pathlib.Path(lhapdf.paths()[0])
+    return LhapdfDataFile.read_with_set(src, pdfset_name, member)
diff --git a/src/ekobox/genpdf/parser.py b/src/ekobox/genpdf/parser.py
new file mode 100644
index 000000000..202f2e3d5
--- /dev/null
+++ b/src/ekobox/genpdf/parser.py
@@ -0,0 +1,107 @@
+"""LHAPDF data parser."""
+
+import pathlib
+from dataclasses import dataclass
+from io import StringIO
+from typing import Self, Type
+
+import numpy as np
+import numpy.typing as npt
+
+
+@dataclass(frozen=True)
+class LhapdfDataBlock:
+    """LHAPDF data block."""
+
+    xgrid: npt.NDArray[np.float64]
+    """X grid."""
+
+    qgrid: npt.NDArray[np.float64]
+    """Q grid."""
+
+    pids: npt.NDArray[np.int_]
+    """|PID| grid."""
+
+    data: npt.NDArray[np.float64]
+    """Tabulated data."""
+
+    def is_valid(self: Self) -> bool:
+        """Check if dimensions are reasonable."""
+        for a in [self.xgrid, self.qgrid, self.pids]:
+            if len(a.shape) != 1 or len(a) <= 0:
+                return False
+        return self.data.shape == (len(self.xgrid) * len(self.qgrid), len(self.pids))
+
+
+class LhapdfDataFile:
+    """LHAPDF data file."""
+
+    header: dict[str, str]
+    """Header information."""
+
+    blocks: list[LhapdfDataBlock]
+    """Data blocks."""
+
+    def __init__(
+        self: Self, header: dict[str, str], blocks: list[LhapdfDataBlock]
+    ) -> None:
+        self.header = header
+        self.blocks = blocks
+
+    @classmethod
+    def read(cls: Type[Self], path: pathlib.Path) -> Self:
+        """Read from file."""
+        cnt = path.read_text().split("---\n")
+        # header
+        header = {}
+        for line in cnt[0].splitlines():
+            k, v = line.split(":", 1)
+            header[k] = v.strip()
+        # blocks
+        blocks = []
+        for b in cnt[1:]:
+            # skip last if necessary
+            if len(b.strip()) == 0:
+                continue
+            # subdivide
+            lines = b.splitlines()
+            xgrid = np.fromstring(lines[0], np.float64, sep=" ")
+            qgrid = np.fromstring(lines[1], np.float64, sep=" ")
+            pids = np.fromstring(lines[2], np.int_, sep=" ")
+            data = np.loadtxt(StringIO("\n".join(lines[3:])))
+            blocks.append(
+                LhapdfDataBlock(xgrid=xgrid, qgrid=qgrid, pids=pids, data=data)
+            )
+        return cls(header=header, blocks=blocks)
+
+    def write(self: Self, path: pathlib.Path) -> int:
+        """Write to file."""
+        cnt = ""
+        # header
+        for k, v in self.header.items():
+            cnt += f"{k}: {v}\n"
+        if "Format" not in self.header:
+            cnt += "Format: lhagrid1\n"
+        cnt += "---\n"
+        # blocks
+        blocks = []
+        for b in self.blocks:
+            bcnt = StringIO()
+            np.savetxt(bcnt, b.xgrid, "%.8e", newline=" ")
+            bcnt.write("\n")
+            np.savetxt(bcnt, b.qgrid, "%.8e", newline=" ")
+            bcnt.write("\n")
+            np.savetxt(bcnt, b.pids, "%d", newline=" ")
+            bcnt.write("\n")
+            np.savetxt(bcnt, b.data, "%.8e")
+            blocks.append(bcnt.getvalue())
+        cnt += "---\n".join(blocks)
+        cnt += "---\n"
+        return path.write_text(cnt)
+
+    @classmethod
+    def read_with_set(
+        cls: Type[Self], path: pathlib.Path, pdfset: str, member: int = 0
+    ) -> Self:
+        """Read given `member` from given `pdfset` inside `path`."""
+        return cls.read(path / pdfset / f"{pdfset}_{member:04d}.dat")
diff --git a/tests/ekobox/test_evol_pdf.py b/tests/ekobox/test_evol_pdf.py
index baf70726d..a55c1b4bd 100644
--- a/tests/ekobox/test_evol_pdf.py
+++ b/tests/ekobox/test_evol_pdf.py
@@ -119,11 +119,11 @@ def mk(eps):
     # basic
     eps = [(3.0, 3), (4.0, 3)]
     bs = ev_p.collect_blocks(mk(eps), ev_p.regroup_evolgrid(eps), xgrid)
-    assert len(bs) == 1
-    np.testing.assert_allclose(bs[0]["mu2grid"], (3.0, 4.0))
+    assert len(bs.blocks) == 1
+    np.testing.assert_allclose(bs.blocks[0].qgrid ** 2.0, (3.0, 4.0))
     # more advanced
     eps = [(4.0, 3), (3.0, 3), (5.0, 4), (3.0, 4)]
     bs = ev_p.collect_blocks(mk(eps), ev_p.regroup_evolgrid(eps), xgrid)
-    assert len(bs) == 2
-    np.testing.assert_allclose(bs[0]["mu2grid"], (3.0, 4.0))
-    np.testing.assert_allclose(bs[1]["mu2grid"], (3.0, 5.0))
+    assert len(bs.blocks) == 2
+    np.testing.assert_allclose(bs.blocks[0].qgrid ** 2.0, (3.0, 4.0))
+    np.testing.assert_allclose(bs.blocks[1].qgrid ** 2.0, (3.0, 5.0))
diff --git a/tests/ekobox/test_genpdf.py b/tests/ekobox/test_genpdf.py
index a379b2931..87fdac8f4 100644
--- a/tests/ekobox/test_genpdf.py
+++ b/tests/ekobox/test_genpdf.py
@@ -28,8 +28,6 @@ def test_genpdf_exceptions(tmp_path, cd):
                 10,
             )
         # non-existant PDF set
-        with pytest.raises(FileNotFoundError):
-            genpdf.install_pdf("foo")
         with pytest.raises(TypeError):
             genpdf.generate_pdf("debug", [21], info_update=(10, 15, 20))
 
@@ -39,11 +37,9 @@ def test_generate_block():
     mu2s = np.geomspace(1.0, 1e3, 5).tolist()
     pids = np.arange(3)
     b = genpdf.generate_block(lambda pid, x, q2: pid * x * q2, xg, mu2s, pids)
-    assert isinstance(b, dict)
-    assert sorted(b.keys()) == sorted(["data", "mu2grid", "xgrid", "pids"])
-    assert isinstance(b["data"], np.ndarray)
-    assert b["data"].shape == (len(xg) * len(mu2s), len(pids))
-    assert b["mu2grid"] == sorted(b["mu2grid"])
+    assert isinstance(b.data, np.ndarray)
+    assert b.data.shape == (len(xg) * len(mu2s), len(pids))
+    np.testing.assert_allclose(b.qgrid, sorted(b.qgrid))
 
 
 def test_install_pdf(fake_lhapdf, cd):
@@ -100,19 +96,16 @@ def test_generate_pdf_debug_pid(fake_lhapdf, cd):
     ppm = pp / f"{n}_0000.dat"
     assert ppm.exists()
     assert "PdfType: central" in ppm.read_text()
-    head, blocks = genpdf.load.load_blocks_from_file(n, 0)
-    assert "PdfType: central" in head
-    assert len(blocks) == 1
-    b = blocks[0]
-    assert 21 in b["pids"]
-    for k, line in enumerate(b["data"]):
-        for pid, f in zip(b["pids"], line):
+    f = genpdf.load.load_blocks_from_file(n, 0)
+    assert "PdfType" in f.header
+    assert f.header["PdfType"] == "central"
+    assert len(f.blocks) == 1
+    b = f.blocks[0]
+    assert 21 in b.pids
+    for k, line in enumerate(b.data):
+        for pid, f in zip(b.pids, line):
             # the gluon is non-zero in the bulk - x=0 is included here
-            if (
-                pid == 21
-                and k > len(b["mu2grid"]) - 1
-                and k < len(b["data"]) - len(b["mu2grid"])
-            ):
+            if pid == 21 and k > len(b.qgrid) - 1 and k < len(b.data) - len(b.qgrid):
                 assert not f == 0.0
             else:
                 assert f == 0.0
@@ -143,23 +136,25 @@ def test_generate_pdf_pdf_evol(fake_lhapdf, fake_nn31, fake_mstw, fake_ct14, cd)
         for m in range(nmem + 1):
             pm = p / f"{n}_{m:04d}.dat"
             assert pm.exists()
-            head, blocks = genpdf.load.load_blocks_from_file(n, m)
-            assert ("PdfType: central" if m == 0 else f"PdfType: {err_type}") in head
-            assert len(blocks) == nb
-            for b in blocks:
-                for k, line in enumerate(b["data"]):
-                    for pid, f in zip(b["pids"], line):
+            f = genpdf.load.load_blocks_from_file(n, m)
+            assert "PdfType" in f.header
+            if m == 0:
+                assert f.header["PdfType"] == "central"
+            else:
+                assert f.header["PdfType"] == err_type
+            assert len(f.blocks) == nb
+            for b in f.blocks:
+                for k, line in enumerate(b.data):
+                    for pid, f in zip(b.pids, line):
                         # the singlet is non-zero in the bulk - x >= 0
-                        if abs(pid) in range(1, 6 + 1) and k < len(b["data"]) - len(
-                            b["mu2grid"]
+                        if abs(pid) in range(1, 6 + 1) and k < len(b.data) - len(
+                            b.qgrid
                         ):
                             assert not f == 0.0
                             assert not np.isclose(f, 0.0, atol=1e-15)
                         else:
                             # MSTW is not 0 at the end, but only almost
-                            if err_type == "error" and k >= len(b["data"]) - len(
-                                b["mu2grid"]
-                            ):
+                            if err_type == "error" and k >= len(b.data) - len(b.qgrid):
                                 np.testing.assert_allclose(f, 0.0, atol=1e-15)
                             else:
                                 assert f == 0.0
@@ -194,14 +189,15 @@ def test_generate_pdf_toy_antiqed(fake_lhapdf, cd):
     pm = p / f"{n}_0000.dat"
     assert pm.exists()
     assert "PdfType: central" in pm.read_text()
-    head, blocks = genpdf.load.load_blocks_from_file(n, 0)
-    assert "PdfType: central" in head
-    assert len(blocks) == 1
-    b = blocks[0]
-    for k, line in enumerate(b["data"]):
-        for pid, f in zip(b["pids"], line):
+    f = genpdf.load.load_blocks_from_file(n, 0)
+    assert "PdfType" in f.header
+    assert f.header["PdfType"] == "central"
+    assert len(f.blocks) == 1
+    b = f.blocks[0]
+    for k, line in enumerate(b.data):
+        for pid, f in zip(b.pids, line):
             # the u and d are non-zero in the bulk - x=0 is not included here
-            if abs(pid) in [1, 2] and k < len(b["data"]) - len(b["mu2grid"]):
+            if abs(pid) in [1, 2] and k < len(b.data) - len(b.qgrid):
                 assert not f == 0.0
             else:
                 assert f == 0.0
diff --git a/tests/ekobox/test_genpdf_export.py b/tests/ekobox/test_genpdf_export.py
index c87df43b3..e3abf5835 100644
--- a/tests/ekobox/test_genpdf_export.py
+++ b/tests/ekobox/test_genpdf_export.py
@@ -2,24 +2,7 @@
 import yaml
 
 from ekobox import genpdf
-
-
-def test_list_to_str():
-    a = genpdf.export.list_to_str([1, 2])
-    assert isinstance(a, str)
-    assert "1." in a
-    b = genpdf.export.list_to_str([1.0, 2.0])
-    assert isinstance(b, str)
-    assert "1." in a
-
-
-def test_array_to_str():
-    s = (2, 2)
-    a = genpdf.export.array_to_str(np.arange(4).reshape(s))
-    assert isinstance(a, str)
-    assert "1." in a
-    b = np.array([e.split() for e in a.splitlines()])
-    assert b.shape == s
+from ekobox.genpdf.parser import LhapdfDataBlock, LhapdfDataFile
 
 
 def test_dump_info(tmp_path):
@@ -50,42 +33,14 @@ def fake_blocks(n_blocks, n_x, n_q2, n_pids):
     bs = []
     for _ in range(n_blocks):
         bs.append(
-            {
-                "xgrid": np.linspace(0.0, 1.0, n_x),
-                "mu2grid": np.geomspace(1.0, 1e3, n_q2),
-                "pids": np.arange(n_pids),
-                "data": np.random.rand(n_x * n_q2, n_pids),
-            }
+            LhapdfDataBlock(
+                xgrid=np.linspace(0.0, 1.0, n_x),
+                qgrid=np.geomspace(1.0, 1e3, n_q2),
+                pids=np.arange(n_pids),
+                data=np.random.rand(n_x * n_q2, n_pids),
+            )
         )
-    return bs
-
-
-def test_dump_blocks(tmp_path):
-    n = "test"
-    p = tmp_path / n
-    nb = 2
-    for m in range(3):
-        f = p / f"{n}_{m:04d}.dat"
-        is_my_type = m > 1
-        pdf_type = "Bla: blub" if is_my_type else None
-        g = genpdf.export.dump_blocks(p, m, fake_blocks(nb, 2, 2, 2), pdf_type=pdf_type)
-        assert p.exists()
-        assert f.exists()
-        assert f == g
-        cnt = f.read_text()
-        if is_my_type:
-            assert "Bla: blub" in cnt
-        else:
-            assert ("central" in cnt) == (m == 0)
-        assert "Format" in cnt
-        assert cnt.count("---") == nb + 1
-
-
-def test_dump_blocks_to_file(tmp_path):
-    f = tmp_path / "mem.dat"
-    g = genpdf.export.dump_blocks(f, 0, fake_blocks(2, 2, 2, 2))
-    assert f.exists()
-    assert f == g
+    return LhapdfDataFile(header={}, blocks=bs)
 
 
 def test_dump_set(tmp_path):
@@ -93,7 +48,7 @@ def test_dump_set(tmp_path):
     p = tmp_path / n
     i = {"a": "b", "c": 2}
     nmem = 2
-    for pdf_type_list in (None, ["Bla: Blub"] * nmem):
+    for pdf_type_list in (None, [{"Bla": "Blub"}] * nmem):
         genpdf.export.dump_set(
             p, i, [fake_blocks(2, 2, 2, 2) for _ in range(nmem)], pdf_type_list
         )
diff --git a/tests/ekobox/test_genpdf_flavors.py b/tests/ekobox/test_genpdf_flavors.py
index e1f3d695e..3c3cea0d5 100644
--- a/tests/ekobox/test_genpdf_flavors.py
+++ b/tests/ekobox/test_genpdf_flavors.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from ekobox import genpdf
+from ekobox.genpdf.parser import LhapdfDataBlock, LhapdfDataFile
 
 
 def test_is_evolution():
@@ -34,53 +35,61 @@ def test_flavors_evol_to_flavor():
 
 
 def test_flavors_evol_raw():
-    blocks = [
-        {
-            "mu2grid": np.array([1, 2]),
-            "xgrid": np.array([0.1, 1.0]),
-            "pids": np.array([-1, 21, 1]),
-            "data": np.array([[0.1, 0.2, 0.1]] * 4),
-        }
-    ]
-    gonly = genpdf.flavors.project(blocks, genpdf.flavors.evol_to_flavor(["g"]))
-    assert len(gonly) == 1
+    f = LhapdfDataFile(
+        header={},
+        blocks=[
+            LhapdfDataBlock(
+                xgrid=np.array([0.1, 1.0]),
+                qgrid=np.array([1, 2]),
+                pids=np.array([-1, 21, 1]),
+                data=np.array([[0.1, 0.2, 0.1]] * 4),
+            )
+        ],
+    )
+    gonly = genpdf.flavors.project(f, genpdf.flavors.evol_to_flavor(["g"]))
+    assert len(gonly.blocks) == 1
     np.testing.assert_allclose(
-        gonly[0]["data"],
+        gonly.blocks[0].data,
         np.array(
             [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] * 4
         ),
     )
-    Sonly = genpdf.flavors.project(blocks, genpdf.flavors.evol_to_flavor(["S"]))
-    assert len(Sonly) == 1
+    Sonly = genpdf.flavors.project(f, genpdf.flavors.evol_to_flavor(["S"]))
+    assert len(Sonly.blocks) == 1
     for i in [0, 1, 2, 3]:
         # g and gamma are zero
-        np.testing.assert_allclose(Sonly[0]["data"][i][7], 0)
-        np.testing.assert_allclose(Sonly[0]["data"][i][0], 0)
+        np.testing.assert_allclose(Sonly.blocks[0].data[i][7], 0)
+        np.testing.assert_allclose(Sonly.blocks[0].data[i][0], 0)
         # quark are all equal and equal to anti-quarks
         for pid in [2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13]:
-            np.testing.assert_allclose(Sonly[0]["data"][i][pid], Sonly[0]["data"][i][1])
+            np.testing.assert_allclose(
+                Sonly.blocks[0].data[i][pid], Sonly.blocks[0].data[i][1]
+            )
 
 
 def test_flavors_evol_nodata():
     # try with a block without data
-    blocks = [
-        {
-            "mu2grid": np.array([1, 2]),
-            "xgrid": np.array([0.1, 1.0]),
-            "pids": np.array([-1, 21, 1]),
-            "data": np.array([]),
-        },
-        {
-            "mu2grid": np.array([1, 2]),
-            "xgrid": np.array([0.1, 1.0]),
-            "pids": np.array([-1, 21, 1]),
-            "data": np.array([[0.1, 0.2, 0.1]] * 4),
-        },
-    ]
-    gonly = genpdf.flavors.project(blocks, genpdf.flavors.evol_to_flavor(["g"]))
-    assert len(gonly) == 2
+    f = LhapdfDataFile(
+        header={},
+        blocks=[
+            LhapdfDataBlock(
+                xgrid=np.array([0.1, 1.0]),
+                qgrid=np.array([1, 2]),
+                pids=np.array([-1, 21, 1]),
+                data=np.array([]),
+            ),
+            LhapdfDataBlock(
+                xgrid=np.array([0.1, 1.0]),
+                qgrid=np.array([1, 2]),
+                pids=np.array([-1, 21, 1]),
+                data=np.array([[0.1, 0.2, 0.1]] * 4),
+            ),
+        ],
+    )
+    gonly = genpdf.flavors.project(f, genpdf.flavors.evol_to_flavor(["g"]))
+    assert len(gonly.blocks) == 2
     np.testing.assert_allclose(
-        gonly[1]["data"],
+        gonly.blocks[1].data,
         np.array(
             [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] * 4
         ),
diff --git a/tests/ekobox/test_genpdf_load.py b/tests/ekobox/test_genpdf_load.py
index aab51cc17..4e648bf87 100644
--- a/tests/ekobox/test_genpdf_load.py
+++ b/tests/ekobox/test_genpdf_load.py
@@ -1,5 +1,3 @@
-import numpy as np
-
 from ekobox import genpdf
 
 
@@ -11,11 +9,5 @@ def test_load_info(fake_ct14):
 
 
 def test_load_data_ct14(fake_ct14):
-    blocks = genpdf.load.load_blocks_from_file(fake_ct14, 0)[1]
-    assert len(blocks) == 1
-    b0 = blocks[0]
-    assert isinstance(b0, dict)
-    assert sorted(b0.keys()) == sorted(["pids", "xgrid", "mu2grid", "data"])
-    assert sorted(b0["pids"]) == sorted([-3, -2, -1, 21, 1, 2, 3])
-    assert len(b0["data"].T) == 7
-    np.testing.assert_allclose(b0["xgrid"][0], 1e-9)
+    f = genpdf.load.load_blocks_from_file(fake_ct14, 0)
+    assert len(f.blocks) == 1
diff --git a/tests/ekobox/test_genpdf_parser.py b/tests/ekobox/test_genpdf_parser.py
new file mode 100644
index 000000000..76968026f
--- /dev/null
+++ b/tests/ekobox/test_genpdf_parser.py
@@ -0,0 +1,64 @@
+import numpy as np
+
+from ekobox.genpdf.parser import LhapdfDataFile
+
+
+def test_genpdf_parser_ct14(fake_lhapdf, fake_ct14, tmp_path):
+    # read
+    f = LhapdfDataFile.read_with_set(fake_lhapdf, fake_ct14)
+    assert "PdfType" in f.header
+    assert f.header["PdfType"] == "central"
+    assert len(f.blocks) == 1
+    b0 = f.blocks[0]
+    assert b0.is_valid()
+    assert sorted(b0.pids) == sorted([-3, -2, -1, 21, 1, 2, 3])
+    assert len(b0.data.T) == 7
+    np.testing.assert_allclose(b0.xgrid[0], 1e-9)
+    # write
+    t = tmp_path / "blub.dat"
+    f.write(t)
+    # read back
+    g = LhapdfDataFile.read(t)
+    assert g.header == f.header
+    assert len(g.blocks) == len(f.blocks)
+    c0 = g.blocks[0]
+    np.testing.assert_allclose(c0.xgrid, b0.xgrid)
+    np.testing.assert_allclose(c0.qgrid, b0.qgrid)
+    np.testing.assert_allclose(c0.pids, b0.pids)
+    np.testing.assert_allclose(c0.data, b0.data)
+
+
+def test_genpdf_parser_mstw(fake_lhapdf, fake_mstw):
+    # read
+    f0 = LhapdfDataFile.read_with_set(fake_lhapdf, fake_mstw, 0)
+    assert "PdfType" in f0.header
+    assert f0.header["PdfType"] == "central"
+    assert len(f0.blocks) == 3
+    for b in f0.blocks:
+        assert b.is_valid()
+    b0 = f0.blocks[0]
+    assert sorted(b0.pids) == sorted([-5, -4, -3, -2, -1, 21, 1, 2, 3, 4, 5])
+    assert len(b0.data.T) == 11
+    np.testing.assert_allclose(b0.xgrid[0], 1e-6)
+    # check also first member
+    f1 = LhapdfDataFile.read_with_set(fake_lhapdf, fake_mstw, 1)
+    assert "PdfType" in f1.header
+    assert f1.header["PdfType"] == "error"
+
+
+def test_genpdf_parser_nn31(fake_lhapdf, fake_nn31):
+    # read
+    f0 = LhapdfDataFile.read_with_set(fake_lhapdf, fake_nn31, 0)
+    assert "PdfType" in f0.header
+    assert f0.header["PdfType"] == "central"
+    assert len(f0.blocks) == 2
+    for b in f0.blocks:
+        assert b.is_valid()
+    b0 = f0.blocks[0]
+    assert sorted(b0.pids) == sorted([-5, -4, -3, -2, -1, 21, 1, 2, 3, 4, 5])
+    assert len(b0.data.T) == 11
+    np.testing.assert_allclose(b0.xgrid[0], 1e-9)
+    # check also first member
+    f1 = LhapdfDataFile.read_with_set(fake_lhapdf, fake_nn31, 1)
+    assert "PdfType" in f1.header
+    assert f1.header["PdfType"] == "replica"

From ecabc2b725f470a910f20c86ca843d7daca67467 Mon Sep 17 00:00:00 2001
From: Felix Hekhorn <felix.a.hekhorn@jyu.fi>
Date: Wed, 8 Apr 2026 15:40:59 +0300
Subject: [PATCH 2/5] Drop Self type due to py3.10

---
 src/ekobox/genpdf/parser.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/ekobox/genpdf/parser.py b/src/ekobox/genpdf/parser.py
index 202f2e3d5..efca09007 100644
--- a/src/ekobox/genpdf/parser.py
+++ b/src/ekobox/genpdf/parser.py
@@ -3,7 +3,6 @@
 import pathlib
 from dataclasses import dataclass
 from io import StringIO
-from typing import Self, Type
 
 import numpy as np
 import numpy.typing as npt
@@ -25,7 +24,7 @@ class LhapdfDataBlock:
     data: npt.NDArray[np.float64]
     """Tabulated data."""
 
-    def is_valid(self: Self) -> bool:
+    def is_valid(self) -> bool:
         """Check if dimensions are reasonable."""
         for a in [self.xgrid, self.qgrid, self.pids]:
             if len(a.shape) != 1 or len(a) <= 0:
@@ -42,14 +41,12 @@ class LhapdfDataFile:
     blocks: list[LhapdfDataBlock]
     """Data blocks."""
 
-    def __init__(
-        self: Self, header: dict[str, str], blocks: list[LhapdfDataBlock]
-    ) -> None:
+    def __init__(self, header: dict[str, str], blocks: list[LhapdfDataBlock]) -> None:
         self.header = header
         self.blocks = blocks
 
     @classmethod
-    def read(cls: Type[Self], path: pathlib.Path) -> Self:
+    def read(cls, path: pathlib.Path):
         """Read from file."""
         cnt = path.read_text().split("---\n")
         # header
@@ -74,7 +71,7 @@ def read(cls: Type[Self], path: pathlib.Path) -> Self:
             )
         return cls(header=header, blocks=blocks)
 
-    def write(self: Self, path: pathlib.Path) -> int:
+    def write(self, path: pathlib.Path) -> int:
         """Write to file."""
         cnt = ""
         # header
@@ -100,8 +97,6 @@ def write(self: Self, path: pathlib.Path) -> int:
         return path.write_text(cnt)
 
     @classmethod
-    def read_with_set(
-        cls: Type[Self], path: pathlib.Path, pdfset: str, member: int = 0
-    ) -> Self:
+    def read_with_set(cls, path: pathlib.Path, pdfset: str, member: int = 0):
         """Read given `member` from given `pdfset` inside `path`."""
         return cls.read(path / pdfset / f"{pdfset}_{member:04d}.dat")

From 307fcfe8ffac61182cecede060358f76e768fd20 Mon Sep 17 00:00:00 2001
From: Felix Hekhorn <felix.a.hekhorn@jyu.fi>
Date: Wed, 8 Apr 2026 15:45:52 +0300
Subject: [PATCH 3/5] Add Changelog entry

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cb4e3aab7..fe04e96ea 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,9 @@ Items without prefix refer to a global change.
 
 ## [Unreleased](https://github.com/NNPDF/eko/compare/v0.15.3...HEAD)
 
+### Changed
+- py: Add LHAPDF parser to `genpdf` (soft breaking change) ([#506](https://github.com/NNPDF/eko/pull/506))
+
 ## [0.15.3](https://github.com/NNPDF/eko/compare/v0.15.2...v0.15.3) - 2026-02-05
 
 ### Added

From 880d5fca252376847e6f68959309e167f4e4ef22 Mon Sep 17 00:00:00 2001
From: Felix Hekhorn <felix.a.hekhorn@jyu.fi>
Date: Fri, 10 Apr 2026 17:11:54 +0300
Subject: [PATCH 4/5] Add LhapdfBlock.add

---
 src/ekobox/genpdf/parser.py        | 22 ++++++++++++
 tests/ekobox/test_genpdf_parser.py | 55 +++++++++++++++++++++++++++++-
 2 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/src/ekobox/genpdf/parser.py b/src/ekobox/genpdf/parser.py
index efca09007..44706d58a 100644
--- a/src/ekobox/genpdf/parser.py
+++ b/src/ekobox/genpdf/parser.py
@@ -31,6 +31,28 @@ def is_valid(self) -> bool:
                 return False
         return self.data.shape == (len(self.xgrid) * len(self.qgrid), len(self.pids))
 
+    def add(self, other):
+        """Add other block."""
+        # x and q have to be the same
+        np.testing.assert_allclose(self.xgrid, other.xgrid)
+        np.testing.assert_allclose(self.qgrid, other.qgrid)
+        # PID we can recover
+        tot_pids = np.unique(np.concatenate([self.pids, other.pids]))
+        tot_data = []
+        for pid in tot_pids:
+            if pid in self.pids and pid in other.pids:
+                tot_data.append(
+                    self.data[:, self.pids.searchsorted(pid)].copy()
+                    + other.data[:, other.pids.searchsorted(pid)].copy()
+                )
+            elif pid in self.pids:
+                tot_data.append(self.data[:, self.pids.searchsorted(pid)].copy())
+            elif pid in other.pids:
+                tot_data.append(other.data[:, other.pids.searchsorted(pid)].copy())
+        return LhapdfDataBlock(
+            xgrid=self.xgrid, qgrid=self.qgrid, pids=tot_pids, data=np.array(tot_data).T
+        )
+
 
 class LhapdfDataFile:
     """LHAPDF data file."""
diff --git a/tests/ekobox/test_genpdf_parser.py b/tests/ekobox/test_genpdf_parser.py
index 76968026f..6caf64057 100644
--- a/tests/ekobox/test_genpdf_parser.py
+++ b/tests/ekobox/test_genpdf_parser.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from ekobox.genpdf.parser import LhapdfDataFile
+from ekobox.genpdf.parser import LhapdfDataBlock, LhapdfDataFile
 
 
 def test_genpdf_parser_ct14(fake_lhapdf, fake_ct14, tmp_path):
@@ -62,3 +62,56 @@ def test_genpdf_parser_nn31(fake_lhapdf, fake_nn31):
     f1 = LhapdfDataFile.read_with_set(fake_lhapdf, fake_nn31, 1)
     assert "PdfType" in f1.header
     assert f1.header["PdfType"] == "replica"
+
+
+def test_genpdf_parser_block_add():
+    a = LhapdfDataBlock(
+        xgrid=np.array([0.5]),
+        qgrid=np.array([10.0]),
+        pids=np.array([1]),
+        data=np.array([[1.0], [-1.0]]),
+    )
+    b = LhapdfDataBlock(
+        xgrid=np.array([0.5]),
+        qgrid=np.array([10.0]),
+        pids=np.array([1]),
+        data=np.array([[2.0], [1.0]]),
+    )
+    c = a.add(b)
+    np.testing.assert_allclose(c.xgrid, a.xgrid)
+    np.testing.assert_allclose(c.qgrid, a.qgrid)
+    np.testing.assert_allclose(c.pids, a.pids)
+    np.testing.assert_allclose(c.data, np.array([[3.0], [0.0]]))
+    b = LhapdfDataBlock(
+        xgrid=np.array([0.5]),
+        qgrid=np.array([10.0]),
+        pids=np.array([2]),
+        data=np.array([[2.0], [1.0]]),
+    )
+    c = a.add(b)
+    np.testing.assert_allclose(c.xgrid, a.xgrid)
+    np.testing.assert_allclose(c.qgrid, a.qgrid)
+    np.testing.assert_allclose(c.pids, np.array([1, 2]))
+    np.testing.assert_allclose(c.data, np.array([[1.0, 2.0], [-1.0, 1.0]]))
+    b = LhapdfDataBlock(
+        xgrid=np.array([0.5]),
+        qgrid=np.array([10.0]),
+        pids=np.array([2]),
+        data=np.array([[2.0], [1.0]]),
+    )
+    c = a.add(b)
+    np.testing.assert_allclose(c.xgrid, a.xgrid)
+    np.testing.assert_allclose(c.qgrid, a.qgrid)
+    np.testing.assert_allclose(c.pids, np.array([1, 2]))
+    np.testing.assert_allclose(c.data, np.array([[1.0, 2.0], [-1.0, 1.0]]))
+    b = LhapdfDataBlock(
+        xgrid=np.array([0.5]),
+        qgrid=np.array([10.0]),
+        pids=np.array([1, 2]),
+        data=np.array([[3.0, 2.0], [4.0, 1.0]]),
+    )
+    c = a.add(b)
+    np.testing.assert_allclose(c.xgrid, a.xgrid)
+    np.testing.assert_allclose(c.qgrid, a.qgrid)
+    np.testing.assert_allclose(c.pids, np.array([1, 2]))
+    np.testing.assert_allclose(c.data, np.array([[4.0, 2.0], [3.0, 1.0]]))

From e3bc71d9dda320a57cea3c035a997c8fad7f0070 Mon Sep 17 00:00:00 2001
From: Felix Hekhorn <felix.a.hekhorn@jyu.fi>
Date: Mon, 13 Apr 2026 11:23:37 +0300
Subject: [PATCH 5/5] Fix LhapdfBlock.add

---
 src/ekobox/genpdf/parser.py        |  8 ++++----
 tests/ekobox/test_genpdf_parser.py | 17 +++++++++++++++++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/ekobox/genpdf/parser.py b/src/ekobox/genpdf/parser.py
index 44706d58a..8d8d6b20c 100644
--- a/src/ekobox/genpdf/parser.py
+++ b/src/ekobox/genpdf/parser.py
@@ -42,13 +42,13 @@ def add(self, other):
         for pid in tot_pids:
             if pid in self.pids and pid in other.pids:
                 tot_data.append(
-                    self.data[:, self.pids.searchsorted(pid)].copy()
-                    + other.data[:, other.pids.searchsorted(pid)].copy()
+                    self.data[:, self.pids.tolist().index(pid)].copy()
+                    + other.data[:, other.pids.tolist().index(pid)].copy()
                 )
             elif pid in self.pids:
-                tot_data.append(self.data[:, self.pids.searchsorted(pid)].copy())
+                tot_data.append(self.data[:, self.pids.tolist().index(pid)].copy())
             elif pid in other.pids:
-                tot_data.append(other.data[:, other.pids.searchsorted(pid)].copy())
+                tot_data.append(other.data[:, other.pids.tolist().index(pid)].copy())
         return LhapdfDataBlock(
             xgrid=self.xgrid, qgrid=self.qgrid, pids=tot_pids, data=np.array(tot_data).T
         )
diff --git a/tests/ekobox/test_genpdf_parser.py b/tests/ekobox/test_genpdf_parser.py
index 6caf64057..05aa2887e 100644
--- a/tests/ekobox/test_genpdf_parser.py
+++ b/tests/ekobox/test_genpdf_parser.py
@@ -115,3 +115,20 @@ def test_genpdf_parser_block_add():
     np.testing.assert_allclose(c.qgrid, a.qgrid)
     np.testing.assert_allclose(c.pids, np.array([1, 2]))
     np.testing.assert_allclose(c.data, np.array([[4.0, 2.0], [3.0, 1.0]]))
+    a = LhapdfDataBlock(
+        xgrid=np.array([0.5]),
+        qgrid=np.array([10.0]),
+        pids=np.array([1, -1]),
+        data=np.array([[1.0, 2.0], [3.0, 4.0]]),
+    )
+    b = LhapdfDataBlock(
+        xgrid=np.array([0.5]),
+        qgrid=np.array([10.0]),
+        pids=np.array([-1, 1]),
+        data=np.array([[20.0, 10.0], [40.0, 30.0]]),
+    )
+    c = a.add(b)
+    np.testing.assert_allclose(c.xgrid, a.xgrid)
+    np.testing.assert_allclose(c.qgrid, a.qgrid)
+    np.testing.assert_allclose(c.pids, np.array([-1, 1]))
+    np.testing.assert_allclose(c.data, np.array([[22.0, 11.0], [44.0, 33.0]]))