From ec0f49711ee392ff71e25c73380bd7f2b6f8f445 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Wed, 30 Nov 2022 17:29:59 +0100 Subject: [PATCH 1/3] when the trvl would leave always a full dataset out, get one point with probability frac, fixes #1634 --- .../n3fit/tests/regressions/quickcard_1.json | 56 ++++++------ .../n3fit/tests/regressions/quickcard_2.json | 88 +++++++++---------- validphys2/src/validphys/n3fit_data.py | 10 ++- validphys2/src/validphys/tests/conftest.py | 6 +- 4 files changed, 83 insertions(+), 77 deletions(-) diff --git a/n3fit/src/n3fit/tests/regressions/quickcard_1.json b/n3fit/src/n3fit/tests/regressions/quickcard_1.json index 75cb9f5b75..8a88752155 100644 --- a/n3fit/src/n3fit/tests/regressions/quickcard_1.json +++ b/n3fit/src/n3fit/tests/regressions/quickcard_1.json @@ -20,8 +20,8 @@ }, { "fl": "v3", - "smallx": 0.30309754610061646, - "largex": 2.6213722229003906, + "smallx": 0.3030211925506592, + "largex": 2.621304512023926, "trainable": true }, { @@ -51,45 +51,45 @@ ], "stop_epoch": 1000, "best_epoch": 999, - "erf_tr": 38.022315979003906, - "erf_vl": 31.481945037841797, - "chi2": 22.705814361572266, + "erf_tr": 39.057010650634766, + "erf_vl": 29.909780502319336, + "chi2": 22.705690383911133, "pos_state": "POS_VETO", "arc_lengths": [ - 1.103731936457393, - 1.505681943929697, - 1.2027105035135508, - 1.426451562695196, - 1.1901165080869252 + 1.103742133208361, + 1.5056703837007153, + 1.2027024122156913, + 1.4182762310320054, + 1.190134533856863 ], "integrability": [ - 0.002630652510560605, - 0.0026306525105581624, - 0.00013975353567441395, - 3.6658432483673087, - 0.004028247582030353 + 0.002630715302072484, + 0.002630715302073705, + 0.00013720474453138287, + 3.665958881378174, + 0.004028137103890828 ], "timing": { "walltime": { - "Total": 12.020201921463013, + "Total": 22.385475635528564, "start": 0.0, - "replica_set": 0.003970146179199219, - "replica_fitted": 12.02015495300293, - "replica_set_to_replica_fitted": 12.01618480682373 + "replica_set": 0.025483131408691406, + "replica_fitted": 22.385230779647827, + "replica_set_to_replica_fitted": 22.359747648239136 }, "cputime": { - "Total": 13.138117000000001, + "Total": 27.547072900000003, "start": 0.0, - "replica_set": 0.029436999999999713, - "replica_fitted": 13.138062999999999, - "replica_set_to_replica_fitted": 13.108626 + "replica_set": 1.4402541279999994, + "replica_fitted": 27.546810807999996, + "replica_set_to_replica_fitted": 26.106556679999997 } }, "version": { - "keras": "2.4.0", - "tensorflow": "2.4.1, mkl=False", - "numpy": "1.21.2", - "nnpdf": "4.0.4.221+gc94f7414b-dev", - "validphys": "4.0.4.221+gc94f7414b-dev" + "keras": "2.7.0", + "tensorflow": "2.7.0, mkl=False", + "numpy": "1.21.5", + "nnpdf": "4.0.5.295+g34eba1986-dev", + "validphys": "4.0.5.295+g34eba1986-dev" } } \ No newline at end of file diff --git a/n3fit/src/n3fit/tests/regressions/quickcard_2.json b/n3fit/src/n3fit/tests/regressions/quickcard_2.json index c84bc93421..44e9473333 100644 --- a/n3fit/src/n3fit/tests/regressions/quickcard_2.json +++ b/n3fit/src/n3fit/tests/regressions/quickcard_2.json @@ -2,94 +2,94 @@ "preprocessing": [ { "fl": "sng", - "smallx": 1.1001018285751343, - "largex": 2.6947875022888184, + "smallx": 1.099926233291626, + "largex": 2.694965362548828, "trainable": true }, { "fl": "g", - "smallx": 0.9447894096374512, - "largex": 1.6324347257614136, + "smallx": 0.9449679851531982, + "largex": 1.6326133012771606, "trainable": true }, { "fl": "v", - "smallx": 0.7402390837669373, - "largex": 1.7163281440734863, + "smallx": 0.7404175400733948, + "largex": 1.7165066003799438, "trainable": true }, { "fl": "v3", - "smallx": 0.2104424387216568, - "largex": 1.3539683818817139, + "smallx": 0.21026656031608582, + "largex": 1.3537929058074951, "trainable": true }, { "fl": "v8", "smallx": 0.7599998712539673, - "largex": 2.40140700340271, + "largex": 2.401231050491333, "trainable": true }, { "fl": "t3", - "smallx": 1.4329228401184082, - "largex": 2.284966230392456, + "smallx": 1.4331011772155762, + "largex": 2.2847883701324463, "trainable": true }, { "fl": "t8", - "smallx": 1.0496479272842407, - "largex": 1.7641903162002563, + "smallx": 1.0498263835906982, + "largex": 1.7643688917160034, "trainable": true }, { "fl": "cp", - "smallx": 0.23785176873207092, - "largex": 2.786799430847168, + "smallx": 0.23803016543388367, + "largex": 2.786621570587158, "trainable": true } ], - "stop_epoch": 313, - "best_epoch": 11, - "erf_tr": 3.859008312225342, - "erf_vl": 3.781097650527954, - "chi2": 2.1904516220092773, + "stop_epoch": 302, + "best_epoch": 0, + "erf_tr": 3.74715518951416, + "erf_vl": 3.685098171234131, + "chi2": 2.187250852584839, "pos_state": "POS_PASS", "arc_lengths": [ - 1.9062157860541882, - 1.1675613091142847, - 1.1346750403691772, - 1.5211630991705873, - 1.099993511588776 + 1.9042910513554256, + 1.1674365527313044, + 1.1344623143618382, + 1.5301224176894808, + 1.1008704792768709 ], "integrability": [ - 0.025609320495277865, - 0.025609320495276755, - 2.8907650913900795e-05, - 16.2582106590271, - 0.028889004141091856 + 0.02566919755190339, + 0.025669197551905665, + 2.9297336424871645e-05, + 16.31229305267334, + 0.028826652094720906 ], "timing": { "walltime": { - "Total": 7.219811916351318, + "Total": 13.346972227096558, "start": 0.0, - "replica_set": 0.00436091423034668, - "replica_fitted": 7.219769716262817, - "replica_set_to_replica_fitted": 7.215408802032471 + "replica_set": 0.005179166793823242, + "replica_fitted": 13.346774339675903, + "replica_set_to_replica_fitted": 13.34159517288208 }, "cputime": { - "Total": 8.290757, + "Total": 18.004598894000004, "start": 0.0, - "replica_set": 0.03159199999999984, - "replica_fitted": 8.290709999999999, - "replica_set_to_replica_fitted": 8.259117999999999 + "replica_set": 0.29500279599999857, + "replica_fitted": 18.004358473000003, + "replica_set_to_replica_fitted": 17.709355677000005 } }, "version": { - "keras": "2.4.0", - "tensorflow": "2.4.1, mkl=False", - "numpy": "1.21.2", - "nnpdf": "4.0.4.220+g894138a9a", - "validphys": "4.0.4.220+g894138a9a" + "keras": "2.7.0", + "tensorflow": "2.7.0, mkl=False", + "numpy": "1.21.5", + "nnpdf": "4.0.5.295+g34eba1986-dev", + "validphys": "4.0.5.295+g34eba1986-dev" } -} +} \ No newline at end of file diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 9a8633e7d9..ed92a37b33 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -84,7 +84,7 @@ def tr_masks(data, replica_trvlseed): nameseed = int(hashlib.sha256(str(data).encode()).hexdigest(), 16) % 10**8 nameseed += replica_trvlseed # TODO: update this to new random infrastructure. - np.random.seed(nameseed) + rng = np.random.default_rng(nameseed) trmask_partial = [] for dataset in data.datasets: # TODO: python commondata will not require this rubbish. @@ -92,11 +92,15 @@ def tr_masks(data, replica_trvlseed): cuts = dataset.cuts ndata = len(cuts.load()) if cuts else dataset.commondata.ndata frac = dataset.frac - trmax = int(frac * ndata) + # We do this so that a given dataset will always have the same number of points masked + trmax = int(ndata*frac) + if trmax == 0: + # If that number is 0, then get 1 point with probability frac + trmax = int(rng.random() < frac) mask = np.concatenate( [np.ones(trmax, dtype=np.bool), np.zeros(ndata - trmax, dtype=np.bool)] ) - np.random.shuffle(mask) + rng.shuffle(mask) trmask_partial.append(mask) return _TrMasks(str(data), replica_trvlseed, trmask_partial) diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py index 5bde85e172..74eb4315c3 100644 --- a/validphys2/src/validphys/tests/conftest.py +++ b/validphys2/src/validphys/tests/conftest.py @@ -50,9 +50,11 @@ def tmp(tmpdir): HESSIAN_PDF = "NNPDF40_nnlo_as_01180_hessian" THEORYID = 162 FIT = "NNPDF40_nnlo_lowprecision" -FIT_3REPLICAS = "Basic_runcard_3replicas_lowprec" +FIT_3REPLICAS = "Basic_runcard_3replicas_lowprec_221130" FIT_ITERATED = "NNPDF40_nnlo_low_precision_iterated" -PSEUDODATA_FIT = "pseudodata_test_fit_n3fit_220330" +PSEUDODATA_FIT = "pseudodata_test_fit_n3fit_221130" + + base_config = dict( pdf=PDF, From 0165cba986d69b9db53454b1c770a40ff40eb0b9 Mon Sep 17 00:00:00 2001 From: "Juan M. Cruz-Martinez" Date: Thu, 1 Dec 2022 09:09:34 +0100 Subject: [PATCH 2/3] Update fitbot.yml after the change of the trvl mask in #1636 the fitbot changes and so it must be updated --- .github/workflows/fitbot.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/fitbot.yml b/.github/workflows/fitbot.yml index 5af0f6e33c..17f8162877 100644 --- a/.github/workflows/fitbot.yml +++ b/.github/workflows/fitbot.yml @@ -10,7 +10,7 @@ on: env: N3FIT_MAXNREP: 20 # total number of replicas to fit POSTFIT_NREP: 16 # requested replicas for postfit - REFERENCE_SET: NNBOT-ecb18dcb3-2022-09-21 # reference set for vp + REFERENCE_SET: NNBOT-90875c07e-2022-11-30 # reference set for vp CONDA_PY: 39 jobs: From 9b845d3a97f0543147f8ed01a14c32944f2a7dce Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 1 Dec 2022 11:19:44 +0100 Subject: [PATCH 3/3] explicitly select a rng from numpy --- validphys2/src/validphys/n3fit_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index ed92a37b33..3c4cc78095 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -84,7 +84,7 @@ def tr_masks(data, replica_trvlseed): nameseed = int(hashlib.sha256(str(data).encode()).hexdigest(), 16) % 10**8 nameseed += replica_trvlseed # TODO: update this to new random infrastructure. - rng = np.random.default_rng(nameseed) + rng = np.random.Generator(np.random.PCG64(nameseed)) trmask_partial = [] for dataset in data.datasets: # TODO: python commondata will not require this rubbish.