From eae43eac1b19c12be60656c6960df8a36a87a2eb Mon Sep 17 00:00:00 2001
From: achiefa <amedeochiefa@gmail.com>
Date: Tue, 28 Apr 2026 14:26:52 +0100
Subject: [PATCH 01/10] Update md5 with stored covmat

---
 n3fit/src/n3fit/scripts/vp_setupfit.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index 91fa2e7001..01e7e67af0 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -66,6 +66,7 @@
 TABLE_OUTPUT_FOLDER = "tables"
 MD5_FILENAME = "md5"
 INPUT_FOLDER = "input"
+COVMAT_TABLE_FILENAME = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
 
 
 class SetupFitError(Exception):
@@ -113,13 +114,24 @@ def init_output(self):
         self.input_folder.mkdir(exist_ok=True)
 
     def save_md5(self):
-        """Generate md5 key from file"""
+        """Generate md5 key from the runcard and the stored fitting covmat table."""
         output_filename = self.output_path / MD5_FILENAME
+        covmat_path = self.output_path / TABLE_OUTPUT_FOLDER / COVMAT_TABLE_FILENAME
+        if not covmat_path.exists():
+            raise SetupFitError(
+                f"Cannot compute md5: expected fitting covmat table not found at {covmat_path}."
+            )
+
+        hash_md5 = hashlib.md5()
         with open(self.config_yml, 'rb') as f:
-            hash_md5 = hashlib.md5(f.read()).hexdigest()
+            hash_md5.update(f.read())
+        with open(covmat_path, 'rb') as f:
+            hash_md5.update(f.read())
+
+        digest = hash_md5.hexdigest()
         with open(output_filename, 'w') as g:
-            g.write(hash_md5)
-        log.info(f"md5 {hash_md5} stored in {output_filename}")
+            g.write(digest)
+        log.info(f"md5 {digest} stored in {output_filename}")
 
     @classmethod
     def ns_dump_description(cls):

From a4dbef2a7eab8ad5241cb1f9ccc4cf93c3f7549e Mon Sep 17 00:00:00 2001
From: achiefa <amedeochiefa@gmail.com>
Date: Tue, 5 May 2026 09:24:06 +0200
Subject: [PATCH 02/10] Check md5 integrity against vp-setupfit in n3fit

---
 n3fit/src/n3fit/scripts/n3fit_exec.py  | 35 ++++++++++++++++++++++++++
 n3fit/src/n3fit/scripts/vp_setupfit.py | 32 +++++++++++++----------
 2 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py
index c77fbbeae0..50ef4d9d0b 100755
--- a/n3fit/src/n3fit/scripts/n3fit_exec.py
+++ b/n3fit/src/n3fit/scripts/n3fit_exec.py
@@ -13,6 +13,7 @@
 import pandas as pd
 from ruamel.yaml import error
 
+from n3fit.scripts.vp_setupfit import MD5_FILENAME, SetupFitError, _compute_fit_md5
 from reportengine import colors
 from reportengine.namespaces import NSList
 from validphys.api import API
@@ -71,6 +72,11 @@ def init_output(self):
 
         # check if results folder exists
         self.output_path = pathlib.Path(self.output_path).absolute()
+
+        # Verify vp-setupfit hash is consistent with the current fit configuration
+        # before we touch anything in the output folder.
+        self._verify_setupfit_md5()
+
         if not (self.output_path / "nnfit").is_dir():
             if not re.fullmatch(r"[\w.\-]+", self.output_path.name):
                 raise N3FitError("Invalid output folder name. Must be alphanumeric.")
@@ -103,6 +109,29 @@ def init_output(self):
         self.input_folder = self.replica_path / INPUT_FOLDER
         self.input_folder.mkdir(exist_ok=True)
 
+    def _verify_setupfit_md5(self):
+        if getattr(self, "skip_md5_check", False):
+            log.warning("Skipping md5 check against vp-setupfit (--skip-md5-check is set).")
+            return
+
+        md5_path = self.output_path / MD5_FILENAME
+        if not md5_path.exists():
+            raise N3FitError(
+                f"No {MD5_FILENAME} file not found at {md5_path}. "
+                "Run vp-setupfit on this runcard before n3fit."
+            )
+        stored = md5_path.read_text().strip()
+        try:
+            current = _compute_fit_md5(self.config_yml, self.output_path)
+        except SetupFitError as e:
+            raise N3FitError(f"Failed to compute current fit md5: {e}") from e
+        if stored != current:
+            raise N3FitError(
+                f"md5 mismatch in {self.output_path}: stored {stored} != current {current}. "
+                "The runcard changed since vp-setupfit was run. Re-run vp-setupfit (or pass "
+                "--skip-md5-check to override this check if this is the desired behaviour)."
+            )
+
     @classmethod
     def ns_dump_description(cls):
         return {
@@ -312,6 +341,11 @@ def check_positive(value):
             help="End of the range of replicas to compute",
             type=check_positive,
         )
+        parser.add_argument(
+            "--skip-md5-check",
+            help="Skip the integrity check against the md5 written by vp-setupfit.",
+            action="store_true",
+        )
         return parser
 
     def get_commandline_arguments(self, cmdline=None):
@@ -341,6 +375,7 @@ def run(self):
             self.environment.db_host = self.args["db_host"]
             self.environment.db_port = self.args["db_port"]
             self.environment.db_name = self.args["db_name"]
+            self.environment.skip_md5_check = self.args["skip_md5_check"]
             super().run()
         except N3FitError as e:
             log.error(f"Error in n3fit:\n{e}")
diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index 01e7e67af0..0ff47de1f2 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -69,6 +69,24 @@
 COVMAT_TABLE_FILENAME = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
 
 
+def _compute_fit_md5(config_yaml: pathlib.Path, output_path: pathlib.Path) -> str:
+    """Compute the md5 hash of the fit when vp-setupfit is run. This utility function
+    is also used in n3fit to check that the fit configuration has not changed
+    since the setup-fit was run."""
+    covmat_path = output_path / TABLE_OUTPUT_FOLDER / COVMAT_TABLE_FILENAME
+    if not covmat_path.exists():
+        raise SetupFitError(
+            f"Cannot compute md5: expected fitting covmat table not found at {covmat_path}."
+        )
+    hash_md5 = hashlib.md5()
+    with open(config_yaml, 'rb') as f:
+        hash_md5.update(f.read())
+    with open(covmat_path, 'rb') as f:
+        hash_md5.update(f.read())
+    digest = hash_md5.hexdigest()
+    return digest
+
+
 class SetupFitError(Exception):
     """Exception raised when setup-fit cannot succeed and knows why"""
 
@@ -116,19 +134,7 @@ def init_output(self):
     def save_md5(self):
         """Generate md5 key from the runcard and the stored fitting covmat table."""
         output_filename = self.output_path / MD5_FILENAME
-        covmat_path = self.output_path / TABLE_OUTPUT_FOLDER / COVMAT_TABLE_FILENAME
-        if not covmat_path.exists():
-            raise SetupFitError(
-                f"Cannot compute md5: expected fitting covmat table not found at {covmat_path}."
-            )
-
-        hash_md5 = hashlib.md5()
-        with open(self.config_yml, 'rb') as f:
-            hash_md5.update(f.read())
-        with open(covmat_path, 'rb') as f:
-            hash_md5.update(f.read())
-
-        digest = hash_md5.hexdigest()
+        digest = _compute_fit_md5(self.config_yml, self.output_path)
         with open(output_filename, 'w') as g:
             g.write(digest)
         log.info(f"md5 {digest} stored in {output_filename}")

From db531c4984172980928c303b65873a80ac5ef45e Mon Sep 17 00:00:00 2001
From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com>
Date: Thu, 7 May 2026 13:27:23 +0200
Subject: [PATCH 03/10] Apply review's comments

---
 n3fit/src/n3fit/scripts/n3fit_exec.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py
index 50ef4d9d0b..2c149a71a0 100755
--- a/n3fit/src/n3fit/scripts/n3fit_exec.py
+++ b/n3fit/src/n3fit/scripts/n3fit_exec.py
@@ -110,14 +110,14 @@ def init_output(self):
         self.input_folder.mkdir(exist_ok=True)
 
     def _verify_setupfit_md5(self):
-        if getattr(self, "skip_md5_check", False):
+        if sekf.skip_md5_check:
             log.warning("Skipping md5 check against vp-setupfit (--skip-md5-check is set).")
             return
 
         md5_path = self.output_path / MD5_FILENAME
         if not md5_path.exists():
             raise N3FitError(
-                f"No {MD5_FILENAME} file not found at {md5_path}. "
+                f"{MD5_FILENAME} file not found at {md5_path}. "
                 "Run vp-setupfit on this runcard before n3fit."
             )
         stored = md5_path.read_text().strip()
@@ -345,6 +345,7 @@ def check_positive(value):
             "--skip-md5-check",
             help="Skip the integrity check against the md5 written by vp-setupfit.",
             action="store_true",
+            default=False,
         )
         return parser
 

From 357952a47015a0b367e8e603722a56ebe0d5d29a Mon Sep 17 00:00:00 2001
From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com>
Date: Thu, 7 May 2026 13:30:14 +0200
Subject: [PATCH 04/10] Remove hash for covmat

---
 n3fit/src/n3fit/scripts/vp_setupfit.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index 0ff47de1f2..8ed0d8766a 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -66,23 +66,15 @@
 TABLE_OUTPUT_FOLDER = "tables"
 MD5_FILENAME = "md5"
 INPUT_FOLDER = "input"
-COVMAT_TABLE_FILENAME = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
 
 
 def _compute_fit_md5(config_yaml: pathlib.Path, output_path: pathlib.Path) -> str:
     """Compute the md5 hash of the fit when vp-setupfit is run. This utility function
     is also used in n3fit to check that the fit configuration has not changed
     since the setup-fit was run."""
-    covmat_path = output_path / TABLE_OUTPUT_FOLDER / COVMAT_TABLE_FILENAME
-    if not covmat_path.exists():
-        raise SetupFitError(
-            f"Cannot compute md5: expected fitting covmat table not found at {covmat_path}."
-        )
     hash_md5 = hashlib.md5()
     with open(config_yaml, 'rb') as f:
         hash_md5.update(f.read())
-    with open(covmat_path, 'rb') as f:
-        hash_md5.update(f.read())
     digest = hash_md5.hexdigest()
     return digest
 

From e3dc290ca75b05b691e630420644f4e21ec94d4e Mon Sep 17 00:00:00 2001
From: achiefa <amedeochiefa@gmail.com>
Date: Thu, 7 May 2026 14:37:59 +0200
Subject: [PATCH 05/10] Fix typo

---
 n3fit/src/n3fit/scripts/n3fit_exec.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py
index 2c149a71a0..d8b6c408ac 100755
--- a/n3fit/src/n3fit/scripts/n3fit_exec.py
+++ b/n3fit/src/n3fit/scripts/n3fit_exec.py
@@ -110,7 +110,7 @@ def init_output(self):
         self.input_folder.mkdir(exist_ok=True)
 
     def _verify_setupfit_md5(self):
-        if sekf.skip_md5_check:
+        if self.skip_md5_check:
             log.warning("Skipping md5 check against vp-setupfit (--skip-md5-check is set).")
             return
 

From 35d436e01df110021d5c021a6559643d08997ae3 Mon Sep 17 00:00:00 2001
From: achiefa <amedeochiefa@gmail.com>
Date: Thu, 7 May 2026 16:11:58 +0200
Subject: [PATCH 06/10] Update tests so vp-setupfit is run before n3fit

---
 n3fit/src/n3fit/tests/test_fit.py      | 33 +++++++++++++++++++-------
 n3fit/src/n3fit/tests/test_hyperopt.py | 28 +++++++++++++---------
 2 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/n3fit/src/n3fit/tests/test_fit.py b/n3fit/src/n3fit/tests/test_fit.py
index f844bad3a6..c7f10e3faf 100644
--- a/n3fit/src/n3fit/tests/test_fit.py
+++ b/n3fit/src/n3fit/tests/test_fit.py
@@ -24,6 +24,7 @@
 
 import n3fit
 from n3fit.io.writer import SuperEncoder
+from n3fit.tests.helpers import run_n3fit, run_setupfit
 from validphys.n3fit_data import replica_mcseed, replica_nnseed, replica_trvlseed
 from validphys.utils import yaml_safe
 
@@ -160,7 +161,7 @@ def _auxiliary_performfit(tmp_path, runcard=QUICKNAME, replica=1, timing=True, r
     shutil.copy(quickpath, tmp_path)
     shutil.copy(weightpath, tmp_path / f"{weight_name}.weights.h5")
     # run the fit
-    sp.run(f"{EXE} {quickcard} {replica}".split(), cwd=tmp_path, check=True)
+    run_n3fit(quickcard, str(replica), cwd=tmp_path, check=True)
 
     # And compare
     check_fit_results(tmp_path, runcard, replica, old_json_file, timing=timing, rel_error=rel_error)
@@ -185,7 +186,7 @@ def test_performfit_with_old_theory(tmp_path):
     quickcard = "quickcard_old.yml"
     quickpath = REGRESSION_FOLDER / quickcard
     shutil.copy(quickpath, tmp_path)
-    sp.run(f"{EXE} {quickcard} 5".split(), cwd=tmp_path, check=True)
+    run_n3fit(quickcard, "5", cwd=tmp_path, check=True)
 
 
 @pytest.mark.skip(reason="Still not implemented in parallel mode")
@@ -198,7 +199,7 @@ def test_hyperopt(tmp_path):
     # We just want to ensure that the hyperopt can run, but we need to kill it ourselves
     # 60 seconds should be enough
     with pytest.raises(sp.TimeoutExpired):
-        sp.run(f"{EXE} {quickcard} {REPLICA} --hyperopt 1000".split(), cwd=tmp_path, timeout=60)
+        run_n3fit(quickcard, f"{REPLICA} --hyperopt 1000", cwd=tmp_path, timeout=60)
 
 
 def test_novalidation(tmp_path, timing=30):
@@ -209,7 +210,7 @@ def test_novalidation(tmp_path, timing=30):
     quickpath = REGRESSION_FOLDER / quickcard
     shutil.copy(quickpath, tmp_path)
     with pytest.raises(sp.TimeoutExpired):
-        sp.run(f"{EXE} {quickcard} {REPLICA}".split(), cwd=tmp_path, timeout=timing)
+        run_n3fit(quickcard, REPLICA, cwd=tmp_path, timeout=timing)
 
 
 def test_weirdbasis(tmp_path, timing=30):
@@ -225,7 +226,7 @@ def test_weirdbasis(tmp_path, timing=30):
     shutil.copy(quickpath, tmp_path)
     #     with pytest.raises(sp.TimeoutExpired):
     with pytest.raises(sp.CalledProcessError):
-        sp.run(f"{EXE} {quickcard} {REPLICA}".split(), cwd=tmp_path, timeout=timing, check=True)
+        run_n3fit(quickcard, REPLICA, cwd=tmp_path, timeout=timing, check=True)
 
 
 @pytest.mark.linux
@@ -239,7 +240,7 @@ def test_multireplica_runs(tmp_path, runcard):
         path = tmp_path / name
         path.mkdir()
         shutil.copy(quickpath, path)
-        sp.run(f"{EXE} {quickcard} {replicas}".split(), cwd=path, check=True)
+        run_n3fit(quickcard, replicas, cwd=path, check=True)
 
     for name_1, option_1 in options.items():
         for name_2, option_2 in options.items():
@@ -299,8 +300,8 @@ def test_parallel_against_sequential(tmp_path, rep_from=6, rep_to=8):
 
     # Now run both
     for r in range(rep_from, rep_to + 1):
-        sp.run(f"{EXE} {card_sequenti} {r}".split(), cwd=tmp_path, check=True)
-    sp.run(f"{EXE} {card_parallel} {rep_from} -r {rep_to}".split(), cwd=tmp_path, check=True)
+        run_n3fit(card_sequenti, str(r), cwd=tmp_path, setupfit=(r == rep_from), check=True)
+    run_n3fit(card_parallel, f"{rep_from} -r {rep_to}", cwd=tmp_path, check=True)
 
     # Loop over all pseudodata files for both fits and load them up
     folder_seq = card_sequenti.with_suffix("") / "nnfit"
@@ -335,3 +336,19 @@ def compare_weights(option_1, option_2, file_1, file_2):
             weight_name = file_1[key].name
             err_msg = f"Difference between runs `n3fit {option_1}` and `n3fit {option_2}` in weights {weight_name}"
             assert_allclose(file_1[key][:], file_2[key][:], rtol=1e-5, atol=1e-5, err_msg=err_msg)
+
+
+def test_md5_mismatch_is_detected(tmp_path):
+    """vp-setupfit, then tamper with the runcard -> n3fit must refuse to start."""
+    quickcard = f"{QUICKNAME}.yml"
+    weight_name = "weights_pol" if "_pol" in quickcard else "weights"
+    weightpath = REGRESSION_FOLDER / f"{weight_name}_1.weights.h5"
+    shutil.copy(REGRESSION_FOLDER / quickcard, tmp_path)
+    shutil.copy(weightpath, tmp_path / f"{weight_name}.weights.h5")
+    run_setupfit(quickcard, cwd=tmp_path)
+
+    runcard_path = tmp_path / quickcard
+    runcard_path.write_text(runcard_path.read_text() + "\n# tampered\n")
+
+    with pytest.raises(sp.CalledProcessError):
+        run_n3fit(quickcard, REPLICA, cwd=tmp_path, setupfit=False, check=True)
diff --git a/n3fit/src/n3fit/tests/test_hyperopt.py b/n3fit/src/n3fit/tests/test_hyperopt.py
index bbae97ddd4..ad4a83f595 100644
--- a/n3fit/src/n3fit/tests/test_hyperopt.py
+++ b/n3fit/src/n3fit/tests/test_hyperopt.py
@@ -15,6 +15,7 @@
 
 from n3fit.hyper_optimization.rewards import HyperLoss
 from n3fit.model_gen import ReplicaSettings, generate_pdf_model
+from n3fit.tests.helpers import run_n3fit, run_setupfit
 from n3fit.vpinterface import N3PDF
 from validphys.loader import Loader
 from validphys.tests.conftest import THEORYID
@@ -139,20 +140,22 @@ def test_restart_from_pickle(tmp_path):
     # cp runcard to tmp folder
     shutil.copy(quickpath, tmp_path)
     # run some trials for the first time
-    sp.run(
-        f"{EXE} {quickpath} {REPLICA} --hyperopt {n_trials_stop} -o {output_restart}".split(),
+    run_n3fit(
+        quickpath,
+        f"{REPLICA} --hyperopt {n_trials_stop} -o {output_restart}",
         cwd=tmp_path,
         check=True,
     )
-    # restart the hyperopt and calculate more trials
-    sp.run(
-        f"{EXE} {quickpath} {REPLICA} --hyperopt {n_trials_total} -o {output_restart}".split(),
+    run_n3fit(
+        quickpath,
+        f"{REPLICA} --hyperopt {n_trials_total} -o {output_restart}",
         cwd=tmp_path,
+        setupfit=False,
         check=True,
-    )
-    # start again and calculate all trials at once
-    sp.run(
-        f"{EXE} {quickpath} {REPLICA} --hyperopt {n_trials_total} -o {output_direct}".split(),
+    )  # md5 already in output_restart
+    run_n3fit(
+        quickpath,
+        f"{REPLICA} --hyperopt {n_trials_total} -o {output_direct}",
         cwd=tmp_path,
         check=True,
     )
@@ -193,13 +196,15 @@ def test_parallel_hyperopt(tmp_path):
     shutil.copy(quickpath, tmp_path)
 
     # Run hyperopt sequentially
-    sp.run(
-        f"{EXE} {quickpath} {REPLICA} --hyperopt {n_trials} -o {output_sequential}".split(),
+    run_n3fit(
+        quickpath,
+        f"{REPLICA} --hyperopt {n_trials} -o {output_sequential}",
         cwd=tmp_path,
         check=True,
     )
 
     # Run hyperopt in parallel
+    run_setupfit(quickpath, cwd=tmp_path, output=output_parallel)
     workers = []
     my_env = os.environ.copy()
     my_env["CUDA_VISIBLE_DEVICES"] = ""
@@ -248,6 +253,7 @@ def test_parallel_restart(tmp_path):
 
     # cp runcard to tmp folder
     shutil.copy(quickpath, tmp_path)
+    run_setupfit(quickpath, cwd=tmp_path, output=output)
     # run some trials for the first time
     for i in range(n_workers):
         tmp = sp.Popen(

From d6b0f50acf589b9b93df3b007bdd9750623e3048 Mon Sep 17 00:00:00 2001
From: achiefa <amedeochiefa@gmail.com>
Date: Thu, 7 May 2026 16:12:10 +0200
Subject: [PATCH 07/10] Update doc

---
 doc/sphinx/source/tutorials/closuretest.rst | 10 +++---
 doc/sphinx/source/tutorials/run-fit.rst     | 36 ++++++++++++++++++---
 2 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/doc/sphinx/source/tutorials/closuretest.rst b/doc/sphinx/source/tutorials/closuretest.rst
index e5dbe8649d..097f7afcb5 100644
--- a/doc/sphinx/source/tutorials/closuretest.rst
+++ b/doc/sphinx/source/tutorials/closuretest.rst
@@ -159,11 +159,11 @@ Running a closure test with ``n3fit``
 
 Running a closure test with ``n3fit`` will require a valid ``n3fit`` runcard, with
 the closure test settings modified as shown
-:ref:`above <prep_ct_runcard>`. The difference
-between running a closure fit in ``n3fit`` and a standard fit is that the user is
-required to run ``vp-setupfit`` on the runcard before running ``n3fit``. This is
-because the filtering of the data is required to generate the pseudodata central
-values. The workflow is as follows:
+:ref:`above <prep_ct_runcard>`. As with any standard fit, the user is required
+to run ``vp-setupfit`` on the runcard before running ``n3fit`` (see
+:ref:`run-n3fit-fit`); for a closure test this step is additionally responsible
+for filtering the data and generating the pseudodata central values. The
+workflow is as follows:
 
 .. code:: bash
 
diff --git a/doc/sphinx/source/tutorials/run-fit.rst b/doc/sphinx/source/tutorials/run-fit.rst
index 7f11965739..f372efdd78 100644
--- a/doc/sphinx/source/tutorials/run-fit.rst
+++ b/doc/sphinx/source/tutorials/run-fit.rst
@@ -75,10 +75,13 @@ Running the fitting code
 After successfully installing the ``n3fit`` package and preparing a runcard
 following the points presented above you can proceed with a fit.
 
-1.  Prepare the fit using ``vp-setupfit``. This command will generate a
-    folder with the same name as the runcard (minus the file extension) in the
-    current directory, which will contain a copy of the original YAML runcard.
-    The required resources will be downloaded, which includes:
+1.  Prepare the fit using ``vp-setupfit``. **This step is mandatory**: it
+    produces resources that ``n3fit`` requires in order to run.
+
+    The command creates a folder with the same name as the runcard (minus the
+    file extension) in the current directory, which will contain a copy of the
+    original YAML runcard. The required resources will be downloaded, which
+    includes:
 
       - The t0 PDF set (an LHAPDF object).
       - The FastKernel tables in the form of a ``theory_xxx.tgz`` file
@@ -87,6 +90,10 @@ following the points presented above you can proceed with a fit.
     If the runcard requires to precompute some heavy objects shared among replicas,
     such as the theory covariance matrix, it will be done during this step.
 
+    A file named ``md5`` is also written inside the output folder. It contains a
+    hash of the runcard which ``n3fit`` uses to detect changes made to the
+    runcard between ``vp-setupfit`` and ``n3fit`` (see step 2).
+
 ::
 
   vp-setupfit <runcard>.yml
@@ -97,6 +104,27 @@ following the points presented above you can proceed with a fit.
     replica fit you should launch more than 100 replicas (e.g. 120) since not all of them will necessarily converge.
     While by default the code runs each replica separately, it is possible to run many replicas in parallel, see :ref:`parallel-label`.
 
+    Before training begins, ``n3fit`` recomputes the md5 hash of the runcard
+    and compares it against the value written by ``vp-setupfit`` in step 1.
+    If the two do not match — i.e. the runcard was modified after
+    ``vp-setupfit`` was run — ``n3fit`` aborts with an error. The fix is to
+    re-run ``vp-setupfit`` so that the md5 (and any heavy resources affected
+    by the change, such as the covariance matrix) are regenerated from the
+    current runcard.
+
+    .. warning::
+
+       The md5 check can be bypassed by passing ``--skip-md5-check`` to
+       ``n3fit``::
+
+          n3fit <runcard>.yml <replica> --skip-md5-check
+
+       Use this flag at your own risk: it disables the only safeguard
+       against running ``n3fit`` on a runcard that has diverged from the
+       one ``vp-setupfit`` was given. Resources cached during ``vp-setupfit``
+       (covariance matrices, t0 PDF, theory tables) may no longer be
+       consistent with the current runcard.
+
 ::
 
   for i in {1..120} ; do

From c71270f9b301729ae7907ce81237f1b8bb3bf904 Mon Sep 17 00:00:00 2001
From: achiefa <amedeochiefa@gmail.com>
Date: Thu, 7 May 2026 16:14:59 +0200
Subject: [PATCH 08/10] Add helper module

---
 n3fit/src/n3fit/tests/helpers.py | 44 ++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 n3fit/src/n3fit/tests/helpers.py

diff --git a/n3fit/src/n3fit/tests/helpers.py b/n3fit/src/n3fit/tests/helpers.py
new file mode 100644
index 0000000000..c2cb37c826
--- /dev/null
+++ b/n3fit/src/n3fit/tests/helpers.py
@@ -0,0 +1,44 @@
+"""
+Test helpers for the n3fit regression suite.
+
+n3fit verifies an md5 written by vp-setupfit (see
+``N3FitEnvironment._verify_setupfit_md5``), so any test that invokes n3fit
+on a runcard must run vp-setupfit first. ``run_n3fit`` wraps that pair so
+individual tests don't have to.
+"""
+
+import subprocess as sp
+
+
+def run_setupfit(runcard, *, cwd, output=None):
+    """Run ``vp-setupfit <runcard> [-o <output>]`` before ``n3fit``.
+
+    Exposed separately so tests that launch n3fit via ``sp.Popen`` (e.g. the
+    parallel-hyperopt tests) can run the setupfit step once before forking
+    workers.
+    """
+    cmd = ["vp-setupfit", str(runcard)]
+    if output is not None:
+        cmd += ["-o", str(output)]
+    return sp.run(cmd, cwd=cwd, check=True)
+
+
+def run_n3fit(runcard, args="", *, cwd, setupfit=True, **run_kwargs):
+    """Run ``vp-setupfit`` and then ``n3fit`` on the same runcard.
+
+    If ``-o <output>`` appears in ``args`` it's forwarded to vp-setupfit so
+    the md5 lands in the directory n3fit will read it from.
+
+    Set ``setupfit=False`` when this call shares both the runcard *and* the
+    output directory (default or ``-o``) with an earlier call that already
+    ran vp-setupfit — the md5 written by that call is still valid.
+    """
+    args_list = args.split() if isinstance(args, str) else list(args)
+
+    if setupfit:
+        output = None
+        if "-o" in args_list:
+            output = args_list[args_list.index("-o") + 1]
+        run_setupfit(runcard, cwd=cwd, output=output)
+
+    return sp.run(["n3fit", str(runcard), *args_list], cwd=cwd, **run_kwargs)

From c7ac72f97f3baa793768d7386e42945984d14d89 Mon Sep 17 00:00:00 2001
From: achiefa <amedeochiefa@gmail.com>
Date: Thu, 7 May 2026 16:21:12 +0200
Subject: [PATCH 09/10] Update regression_checks

---
 extra_tests/regression_checks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/extra_tests/regression_checks.py b/extra_tests/regression_checks.py
index 22b4e97bf0..b3f038ff41 100644
--- a/extra_tests/regression_checks.py
+++ b/extra_tests/regression_checks.py
@@ -8,6 +8,7 @@
 
 import pytest
 
+from n3fit.tests.helpers import run_n3fit
 from n3fit.tests.test_fit import EXE, check_fit_results
 from validphys.utils import yaml_safe
 
@@ -44,7 +45,7 @@ def test_regression_fit(tmp_path, runcard, replica, regenerate):
     if (wname := runcard_info.get("load")) is not None:
         shutil.copy(REGRESSION_FOLDER / wname, tmp_path)
 
-    sp.run(f"{EXE} {runcard_name} {replica}".split(), cwd=tmp_path, check=True)
+    run_n3fit(runcard_name, f"{replica}", cwd=tmp_path, check=True)
     old_json_file = REGRESSION_FOLDER / f"{runcard}_{replica}.json"
 
     check_fit_results(

From a16ec3153ff242b77098ae58eedbc009c9801e43 Mon Sep 17 00:00:00 2001
From: achiefa <amedeochiefa@gmail.com>
Date: Thu, 7 May 2026 16:29:05 +0200
Subject: [PATCH 10/10] Update pytorch and jax tests to run vp-setupfit before
 n3fit

---
 .github/workflows/all_tests_nnpdf.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/all_tests_nnpdf.yml b/.github/workflows/all_tests_nnpdf.yml
index f37bb5640d..f61f4764c1 100644
--- a/.github/workflows/all_tests_nnpdf.yml
+++ b/.github/workflows/all_tests_nnpdf.yml
@@ -110,6 +110,7 @@ jobs:
       shell: bash -l {0}
       run: |
         cd n3fit/runcards/examples
+        vp-setupfit Basic_runcard.yml
         n3fit Basic_runcard.yml 4
         cat Basic_runcard/nnfit/*/Basic_runcard.json
     - name: Test we can still run postfit
@@ -139,6 +140,7 @@ jobs:
       shell: bash -l {0}
       run: |
         cd n3fit/runcards/examples
+        vp-setupfit Basic_runcard.yml
         n3fit Basic_runcard.yml 42
         cat Basic_runcard/nnfit/*/Basic_runcard.json