From ec0f49711ee392ff71e25c73380bd7f2b6f8f445 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Wed, 30 Nov 2022 17:29:59 +0100
Subject: [PATCH 1/3] when the trvl would leave always a full dataset out, get
 one point with probability frac, fixes #1634

---
 .../n3fit/tests/regressions/quickcard_1.json  | 56 ++++++------
 .../n3fit/tests/regressions/quickcard_2.json  | 88 +++++++++----------
 validphys2/src/validphys/n3fit_data.py        | 10 ++-
 validphys2/src/validphys/tests/conftest.py    |  6 +-
 4 files changed, 83 insertions(+), 77 deletions(-)

diff --git a/n3fit/src/n3fit/tests/regressions/quickcard_1.json b/n3fit/src/n3fit/tests/regressions/quickcard_1.json
index 75cb9f5b75..8a88752155 100644
--- a/n3fit/src/n3fit/tests/regressions/quickcard_1.json
+++ b/n3fit/src/n3fit/tests/regressions/quickcard_1.json
@@ -20,8 +20,8 @@
     },
     {
       "fl": "v3",
-      "smallx": 0.30309754610061646,
-      "largex": 2.6213722229003906,
+      "smallx": 0.3030211925506592,
+      "largex": 2.621304512023926,
       "trainable": true
     },
     {
@@ -51,45 +51,45 @@
   ],
   "stop_epoch": 1000,
   "best_epoch": 999,
-  "erf_tr": 38.022315979003906,
-  "erf_vl": 31.481945037841797,
-  "chi2": 22.705814361572266,
+  "erf_tr": 39.057010650634766,
+  "erf_vl": 29.909780502319336,
+  "chi2": 22.705690383911133,
   "pos_state": "POS_VETO",
   "arc_lengths": [
-    1.103731936457393,
-    1.505681943929697,
-    1.2027105035135508,
-    1.426451562695196,
-    1.1901165080869252
+    1.103742133208361,
+    1.5056703837007153,
+    1.2027024122156913,
+    1.4182762310320054,
+    1.190134533856863
   ],
   "integrability": [
-    0.002630652510560605,
-    0.0026306525105581624,
-    0.00013975353567441395,
-    3.6658432483673087,
-    0.004028247582030353
+    0.002630715302072484,
+    0.002630715302073705,
+    0.00013720474453138287,
+    3.665958881378174,
+    0.004028137103890828
   ],
   "timing": {
     "walltime": {
-      "Total": 12.020201921463013,
+      "Total": 22.385475635528564,
       "start": 0.0,
-      "replica_set": 0.003970146179199219,
-      "replica_fitted": 12.02015495300293,
-      "replica_set_to_replica_fitted": 12.01618480682373
+      "replica_set": 0.025483131408691406,
+      "replica_fitted": 22.385230779647827,
+      "replica_set_to_replica_fitted": 22.359747648239136
     },
     "cputime": {
-      "Total": 13.138117000000001,
+      "Total": 27.547072900000003,
       "start": 0.0,
-      "replica_set": 0.029436999999999713,
-      "replica_fitted": 13.138062999999999,
-      "replica_set_to_replica_fitted": 13.108626
+      "replica_set": 1.4402541279999994,
+      "replica_fitted": 27.546810807999996,
+      "replica_set_to_replica_fitted": 26.106556679999997
     }
   },
   "version": {
-    "keras": "2.4.0",
-    "tensorflow": "2.4.1, mkl=False",
-    "numpy": "1.21.2",
-    "nnpdf": "4.0.4.221+gc94f7414b-dev",
-    "validphys": "4.0.4.221+gc94f7414b-dev"
+    "keras": "2.7.0",
+    "tensorflow": "2.7.0, mkl=False",
+    "numpy": "1.21.5",
+    "nnpdf": "4.0.5.295+g34eba1986-dev",
+    "validphys": "4.0.5.295+g34eba1986-dev"
   }
 }
\ No newline at end of file
diff --git a/n3fit/src/n3fit/tests/regressions/quickcard_2.json b/n3fit/src/n3fit/tests/regressions/quickcard_2.json
index c84bc93421..44e9473333 100644
--- a/n3fit/src/n3fit/tests/regressions/quickcard_2.json
+++ b/n3fit/src/n3fit/tests/regressions/quickcard_2.json
@@ -2,94 +2,94 @@
   "preprocessing": [
     {
       "fl": "sng",
-      "smallx": 1.1001018285751343,
-      "largex": 2.6947875022888184,
+      "smallx": 1.099926233291626,
+      "largex": 2.694965362548828,
       "trainable": true
     },
     {
       "fl": "g",
-      "smallx": 0.9447894096374512,
-      "largex": 1.6324347257614136,
+      "smallx": 0.9449679851531982,
+      "largex": 1.6326133012771606,
       "trainable": true
     },
     {
       "fl": "v",
-      "smallx": 0.7402390837669373,
-      "largex": 1.7163281440734863,
+      "smallx": 0.7404175400733948,
+      "largex": 1.7165066003799438,
       "trainable": true
     },
     {
       "fl": "v3",
-      "smallx": 0.2104424387216568,
-      "largex": 1.3539683818817139,
+      "smallx": 0.21026656031608582,
+      "largex": 1.3537929058074951,
       "trainable": true
     },
     {
       "fl": "v8",
       "smallx": 0.7599998712539673,
-      "largex": 2.40140700340271,
+      "largex": 2.401231050491333,
       "trainable": true
     },
     {
       "fl": "t3",
-      "smallx": 1.4329228401184082,
-      "largex": 2.284966230392456,
+      "smallx": 1.4331011772155762,
+      "largex": 2.2847883701324463,
       "trainable": true
     },
     {
       "fl": "t8",
-      "smallx": 1.0496479272842407,
-      "largex": 1.7641903162002563,
+      "smallx": 1.0498263835906982,
+      "largex": 1.7643688917160034,
       "trainable": true
     },
     {
       "fl": "cp",
-      "smallx": 0.23785176873207092,
-      "largex": 2.786799430847168,
+      "smallx": 0.23803016543388367,
+      "largex": 2.786621570587158,
       "trainable": true
     }
   ],
-  "stop_epoch": 313,
-  "best_epoch": 11,
-  "erf_tr": 3.859008312225342,
-  "erf_vl": 3.781097650527954,
-  "chi2": 2.1904516220092773,
+  "stop_epoch": 302,
+  "best_epoch": 0,
+  "erf_tr": 3.74715518951416,
+  "erf_vl": 3.685098171234131,
+  "chi2": 2.187250852584839,
   "pos_state": "POS_PASS",
   "arc_lengths": [
-    1.9062157860541882,
-    1.1675613091142847,
-    1.1346750403691772,
-    1.5211630991705873,
-    1.099993511588776
+    1.9042910513554256,
+    1.1674365527313044,
+    1.1344623143618382,
+    1.5301224176894808,
+    1.1008704792768709
   ],
   "integrability": [
-    0.025609320495277865,
-    0.025609320495276755,
-    2.8907650913900795e-05,
-    16.2582106590271,
-    0.028889004141091856
+    0.02566919755190339,
+    0.025669197551905665,
+    2.9297336424871645e-05,
+    16.31229305267334,
+    0.028826652094720906
   ],
   "timing": {
     "walltime": {
-      "Total": 7.219811916351318,
+      "Total": 13.346972227096558,
       "start": 0.0,
-      "replica_set": 0.00436091423034668,
-      "replica_fitted": 7.219769716262817,
-      "replica_set_to_replica_fitted": 7.215408802032471
+      "replica_set": 0.005179166793823242,
+      "replica_fitted": 13.346774339675903,
+      "replica_set_to_replica_fitted": 13.34159517288208
     },
     "cputime": {
-      "Total": 8.290757,
+      "Total": 18.004598894000004,
       "start": 0.0,
-      "replica_set": 0.03159199999999984,
-      "replica_fitted": 8.290709999999999,
-      "replica_set_to_replica_fitted": 8.259117999999999
+      "replica_set": 0.29500279599999857,
+      "replica_fitted": 18.004358473000003,
+      "replica_set_to_replica_fitted": 17.709355677000005
     }
   },
   "version": {
-    "keras": "2.4.0",
-    "tensorflow": "2.4.1, mkl=False",
-    "numpy": "1.21.2",
-    "nnpdf": "4.0.4.220+g894138a9a",
-    "validphys": "4.0.4.220+g894138a9a"
+    "keras": "2.7.0",
+    "tensorflow": "2.7.0, mkl=False",
+    "numpy": "1.21.5",
+    "nnpdf": "4.0.5.295+g34eba1986-dev",
+    "validphys": "4.0.5.295+g34eba1986-dev"
   }
-}
+}
\ No newline at end of file
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 9a8633e7d9..ed92a37b33 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -84,7 +84,7 @@ def tr_masks(data, replica_trvlseed):
     nameseed = int(hashlib.sha256(str(data).encode()).hexdigest(), 16) % 10**8
     nameseed += replica_trvlseed
     # TODO: update this to new random infrastructure.
-    np.random.seed(nameseed)
+    rng = np.random.default_rng(nameseed)
     trmask_partial = []
     for dataset in data.datasets:
         # TODO: python commondata will not require this rubbish.
@@ -92,11 +92,15 @@ def tr_masks(data, replica_trvlseed):
         cuts = dataset.cuts
         ndata = len(cuts.load()) if cuts else dataset.commondata.ndata
         frac = dataset.frac
-        trmax = int(frac * ndata)
+        # We do this so that a given dataset will always have the same number of points masked
+        trmax = int(ndata*frac)
+        if trmax == 0:
+            # If that number is 0, then get 1 point with probability frac
+            trmax = int(rng.random() < frac)
         mask = np.concatenate(
             [np.ones(trmax, dtype=np.bool), np.zeros(ndata - trmax, dtype=np.bool)]
         )
-        np.random.shuffle(mask)
+        rng.shuffle(mask)
         trmask_partial.append(mask)
     return _TrMasks(str(data), replica_trvlseed, trmask_partial)
 
diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py
index 5bde85e172..74eb4315c3 100644
--- a/validphys2/src/validphys/tests/conftest.py
+++ b/validphys2/src/validphys/tests/conftest.py
@@ -50,9 +50,11 @@ def tmp(tmpdir):
 HESSIAN_PDF = "NNPDF40_nnlo_as_01180_hessian"
 THEORYID = 162
 FIT = "NNPDF40_nnlo_lowprecision"
-FIT_3REPLICAS = "Basic_runcard_3replicas_lowprec"
+FIT_3REPLICAS = "Basic_runcard_3replicas_lowprec_221130"
 FIT_ITERATED = "NNPDF40_nnlo_low_precision_iterated"
-PSEUDODATA_FIT = "pseudodata_test_fit_n3fit_220330"
+PSEUDODATA_FIT = "pseudodata_test_fit_n3fit_221130"
+
+
 
 base_config = dict(
         pdf=PDF,

From 0165cba986d69b9db53454b1c770a40ff40eb0b9 Mon Sep 17 00:00:00 2001
From: "Juan M. Cruz-Martinez" <juacrumar@lairen.eu>
Date: Thu, 1 Dec 2022 09:09:34 +0100
Subject: [PATCH 2/3] Update fitbot.yml

after the change of the trvl mask in #1636 the fitbot changes and so it must be updated
---
 .github/workflows/fitbot.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/fitbot.yml b/.github/workflows/fitbot.yml
index 5af0f6e33c..17f8162877 100644
--- a/.github/workflows/fitbot.yml
+++ b/.github/workflows/fitbot.yml
@@ -10,7 +10,7 @@ on:
 env:
   N3FIT_MAXNREP: 20 # total number of replicas to fit
   POSTFIT_NREP: 16 # requested replicas for postfit
-  REFERENCE_SET: NNBOT-ecb18dcb3-2022-09-21 # reference set for vp
+  REFERENCE_SET: NNBOT-90875c07e-2022-11-30 # reference set for vp
   CONDA_PY: 39
 
 jobs:

From 9b845d3a97f0543147f8ed01a14c32944f2a7dce Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Thu, 1 Dec 2022 11:19:44 +0100
Subject: [PATCH 3/3] explicitly select a rng from numpy

---
 validphys2/src/validphys/n3fit_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index ed92a37b33..3c4cc78095 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -84,7 +84,7 @@ def tr_masks(data, replica_trvlseed):
     nameseed = int(hashlib.sha256(str(data).encode()).hexdigest(), 16) % 10**8
     nameseed += replica_trvlseed
     # TODO: update this to new random infrastructure.
-    rng = np.random.default_rng(nameseed)
+    rng = np.random.Generator(np.random.PCG64(nameseed))
     trmask_partial = []
     for dataset in data.datasets:
         # TODO: python commondata will not require this rubbish.