diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 2beeb514a9..b0141b0f9c 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -11,6 +11,7 @@ from reportengine.table import table from validphys.calcutils import regularize_covmat, get_df_block +from validphys.core import PDF, DataGroupSpec, DataSetSpec from validphys.checks import ( check_dataset_cuts_match_theorycovmat, check_norm_threshold, @@ -66,6 +67,7 @@ def covmat_from_systematics(loaded_commondata_with_cuts, _central_values=None): Parameters ---------- + loaded_commondata_with_cuts : validphys.coredata.CommonData CommonData which stores information about systematic errors, their treatment and description. @@ -78,9 +80,9 @@ def covmat_from_systematics(loaded_commondata_with_cuts, _central_values=None): Returns ------- - cov_mat : np.array - Numpy array which is N_dat x N_dat (where N_dat is the number of data - points after cuts) containing uncertainty and correlation information. + cov_mat: np.array + Numpy array which is N_dat x N_dat (where N_dat is the number of data points after cuts) + containing uncertainty and correlation information. Example ------- @@ -102,7 +104,7 @@ def covmat_from_systematics(loaded_commondata_with_cuts, _central_values=None): >>> from validphys.commondataparser import load_commondata >>> from validphys.loader import Loader - >>> from validphys.calcutils import covmat_from_systematics + >>> from validphys.covmats import covmat_from_systematics >>> l = Loader() >>> cd = l.check_commondata("NMC") >>> cd = load_commondata(cd) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index d62331f467..dba391e6ad 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -12,6 +12,7 @@ import pandas as pd from validphys.checks import check_cuts_fromfit, check_darwin_single_process +from validphys.covmats import INTRA_DATASET_SYS_NAME from reportengine import collect @@ -104,6 +105,129 @@ def read_fit_pseudodata(fitcontext, context_index): yield pseudodata.drop("type", axis=1), tr.index, val.index +def make_replica(dataset_inputs_loaded_cd_with_cuts, seed=None): + """Function that takes in a list of :py:class:`validphys.coredata.CommonData` + objects and returns a pseudodata replica accounting for + possible correlations between systematic uncertainties. + + The function loops until positive definite pseudodata is generated for any + non-asymmetry datasets. In the case of an asymmetry dataset negative values are + permitted so the loop block executes only once. + + Parameters + --------- + dataset_inputs_loaded_cd_with_cuts: list[:py:class:`validphys.coredata.CommonData`] + List of CommonData objects which stores information about systematic errors, + their treatment and description, for each dataset. + + seed: int, None + Seed used to initialise the numpy random number generator. If ``None`` then a random seed is + allocated using the default numpy behaviour. + + Returns + ------- + pseudodata: np.array + Numpy array which is N_dat (where N_dat is the combined number of data points after cuts) + containing monte carlo samples of data centered around the data central value. + + Example + ------- + >>> from validphys.api import API + >>> pseudodata = API.make_replica( + dataset_inputs=[{"dataset":"NMC"}, {"dataset": "NMCPD"}], + use_cuts="nocuts", + theoryid=53 + ) + array([0.25721162, 0.2709698 , 0.27525357, 0.28903442, 0.3114298 , + 0.3005844 , 0.3184538 , 0.31094522, 0.30750703, 0.32673155, + 0.34843355, 0.34730928, 0.3090914 , 0.32825111, 0.3485292 , + """ + # Seed the numpy RNG with the seed. + rng = np.random.default_rng(seed=seed) + + # The inner while True loop is for ensuring a positive definite + # pseudodata replica + while True: + pseudodatas = [] + special_add = [] + special_mult = [] + mult_shifts = [] + check_positive_masks = [] + for cd in dataset_inputs_loaded_cd_with_cuts: + # copy here to avoid mutating the central values. + pseudodata = cd.central_values.to_numpy(copy=True) + + # add contribution from statistical uncertainty + pseudodata += (cd.stat_errors.to_numpy() * rng.normal(size=cd.ndata)) + + # ~~~ ADDITIVE ERRORS ~~~ + add_errors = cd.additive_errors + add_uncorr_errors = add_errors.loc[:, add_errors.columns=="UNCORR"].to_numpy() + + pseudodata += (add_uncorr_errors * rng.normal(size=add_uncorr_errors.shape)).sum(axis=1) + + # correlated within dataset + add_corr_errors = add_errors.loc[:, add_errors.columns == "CORR"].to_numpy() + pseudodata += add_corr_errors @ rng.normal(size=add_corr_errors.shape[1]) + + # append the partially shifted pseudodata + pseudodatas.append(pseudodata) + # store the additive errors with correlations between datasets for later use + special_add.append( + add_errors.loc[:, ~add_errors.columns.isin(INTRA_DATASET_SYS_NAME)] + ) + # ~~~ MULTIPLICATIVE ERRORS ~~~ + mult_errors = cd.multiplicative_errors + mult_uncorr_errors = mult_errors.loc[:, mult_errors.columns == "UNCORR"].to_numpy() + # convert to from percent to fraction + mult_shift = ( + 1 + mult_uncorr_errors * rng.normal(size=mult_uncorr_errors.shape) / 100 + ).prod(axis=1) + + mult_corr_errors = mult_errors.loc[:, mult_errors.columns == "CORR"].to_numpy() + mult_shift *= ( + 1 + mult_corr_errors * rng.normal(size=(1, mult_corr_errors.shape[1])) / 100 + ).prod(axis=1) + + mult_shifts.append(mult_shift) + + # store the multiplicative errors with correlations between datasets for later use + special_mult.append( + mult_errors.loc[:, ~mult_errors.columns.isin(INTRA_DATASET_SYS_NAME)] + ) + + # mask out the data we want to check are all positive + if "ASY" in cd.commondataproc: + check_positive_masks.append(np.zeros_like(pseudodata, dtype=bool)) + else: + check_positive_masks.append(np.ones_like(pseudodata, dtype=bool)) + + # non-overlapping systematics are set to NaN by concat, fill with 0 instead. + special_add_errors = pd.concat(special_add, axis=0, sort=True).fillna(0).to_numpy() + special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() + + + all_pseudodata = ( + np.concatenate(pseudodatas, axis=0) + + special_add_errors @ rng.normal(size=special_add_errors.shape[1]) + ) * ( + np.concatenate(mult_shifts, axis=0) + * (1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100).prod(axis=1) + ) + + if np.all(all_pseudodata[np.concatenate(check_positive_masks, axis=0)] >= 0): + break + + return all_pseudodata + + +def indexed_make_replica(groups_index, make_replica): + """Index the make_replica pseudodata appropriately + """ + + return pd.DataFrame(make_replica, index=groups_index, columns=["data"]) + + @check_darwin_single_process def fitted_pseudodata_internal(fit, experiments, num_fitted_replicas, t0pdfset=None, NPROC=None): """A function to obtain information about the pseudodata that went diff --git a/validphys2/src/validphys/tableloader.py b/validphys2/src/validphys/tableloader.py index 34883ad293..8d4a493fbb 100644 --- a/validphys2/src/validphys/tableloader.py +++ b/validphys2/src/validphys/tableloader.py @@ -32,6 +32,11 @@ def fixup_header(df, head_index, dtype): *oldcols.levels[head_index+1:]]) df.columns = newcols +def parse_data_cv(filename): + """Useful for reading DataFrames with just one column.""" + df = sane_load(filename, index_col=[0, 1, 2]) + return df + def parse_exp_mat(filename): """Parse a dump of a matrix like experiments_covmat.""" df = sane_load(filename, header=[0,1,2], index_col=[0,1,2]) diff --git a/validphys2/src/validphys/tests/regressions/test_mcreplica.csv b/validphys2/src/validphys/tests/regressions/test_mcreplica.csv new file mode 100644 index 0000000000..bdfab83f4b --- /dev/null +++ b/validphys2/src/validphys/tests/regressions/test_mcreplica.csv @@ -0,0 +1,267 @@ +group dataset id data +ATLAS ATLASWZRAP36PB 0 625874.0721154683 +ATLAS ATLASWZRAP36PB 1 629690.9940296302 +ATLAS ATLASWZRAP36PB 2 647804.631311913 +ATLAS ATLASWZRAP36PB 3 640294.5028097499 +ATLAS ATLASWZRAP36PB 4 666615.6925633255 +ATLAS ATLASWZRAP36PB 5 679742.6040870899 +ATLAS ATLASWZRAP36PB 6 661543.8956116233 +ATLAS ATLASWZRAP36PB 7 661401.0725677101 +ATLAS ATLASWZRAP36PB 8 694185.1736757511 +ATLAS ATLASWZRAP36PB 9 674955.0227261193 +ATLAS ATLASWZRAP36PB 10 595607.510395848 +ATLAS ATLASWZRAP36PB 11 461178.58903195267 +ATLAS ATLASWZRAP36PB 12 454625.3670724297 +ATLAS ATLASWZRAP36PB 13 481323.87751588045 +ATLAS ATLASWZRAP36PB 14 458849.11243098835 +ATLAS ATLASWZRAP36PB 15 439424.94273936 +ATLAS ATLASWZRAP36PB 16 432160.16046439036 +ATLAS ATLASWZRAP36PB 17 403846.69581751985 +ATLAS ATLASWZRAP36PB 18 396368.08282609435 +ATLAS ATLASWZRAP36PB 19 393885.7902546321 +ATLAS ATLASWZRAP36PB 20 366443.97889378696 +ATLAS ATLASWZRAP36PB 21 344324.53993439255 +ATLAS ATLASWZRAP36PB 22 135144.70842506472 +ATLAS ATLASWZRAP36PB 23 139058.67466877738 +ATLAS ATLASWZRAP36PB 24 129580.76369541005 +ATLAS ATLASWZRAP36PB 25 125247.04589283372 +ATLAS ATLASWZRAP36PB 26 119478.16024355698 +ATLAS ATLASWZRAP36PB 27 115032.04083971222 +ATLAS ATLASWZRAP36PB 28 104865.22589012324 +ATLAS ATLASWZRAP36PB 29 57082.53327858195 +ATLAS ATLASZHIGHMASS49FB 0 225.2241265581888 +ATLAS ATLASZHIGHMASS49FB 1 102.44036519725282 +ATLAS ATLASZHIGHMASS49FB 2 51.11583760792298 +ATLAS ATLASZHIGHMASS49FB 3 27.488866078195667 +ATLAS ATLASZHIGHMASS49FB 4 18.220176599654852 +ATLAS ATLASZHIGHMASS49FB 5 11.174983683757414 +ATLAS ATLASZHIGHMASS49FB 6 8.695255033748985 +ATLAS ATLASZHIGHMASS49FB 7 4.521850917275335 +ATLAS ATLASZHIGHMASS49FB 8 1.7204940792698935 +ATLAS ATLASZHIGHMASS49FB 9 0.4698742453309486 +ATLAS ATLASZHIGHMASS49FB 10 0.14129611171097067 +ATLAS ATLASZHIGHMASS49FB 11 0.027030707761265878 +ATLAS ATLASZHIGHMASS49FB 12 0.005449291740977146 +ATLAS ATLASLOMASSDY11EXT 0 13576.517089454817 +ATLAS ATLASLOMASSDY11EXT 1 23865.177412820023 +ATLAS ATLASLOMASSDY11EXT 2 14606.30537341771 +ATLAS ATLASLOMASSDY11EXT 3 6812.0492301686645 +ATLAS ATLASLOMASSDY11EXT 4 2873.966448543525 +ATLAS ATLASLOMASSDY11EXT 5 1270.81693829667 +ATLAS ATLASWZRAP11 0 575091.3781838007 +ATLAS ATLASWZRAP11 1 573831.5774693731 +ATLAS ATLASWZRAP11 2 577308.4133944701 +ATLAS ATLASWZRAP11 3 581166.6968380663 +ATLAS ATLASWZRAP11 4 579180.5469411756 +ATLAS ATLASWZRAP11 5 595392.1602877442 +ATLAS ATLASWZRAP11 6 590119.459024331 +ATLAS ATLASWZRAP11 7 598485.3027461147 +ATLAS ATLASWZRAP11 8 596928.0553084731 +ATLAS ATLASWZRAP11 9 589065.2544999232 +ATLAS ATLASWZRAP11 10 554887.8805729451 +ATLAS ATLASWZRAP11 11 432286.1471328586 +ATLAS ATLASWZRAP11 12 428885.6752850355 +ATLAS ATLASWZRAP11 13 426354.80807207135 +ATLAS ATLASWZRAP11 14 419728.3652439385 +ATLAS ATLASWZRAP11 15 411428.30053804716 +ATLAS ATLASWZRAP11 16 402163.10469116265 +ATLAS ATLASWZRAP11 17 384833.51730835374 +ATLAS ATLASWZRAP11 18 373988.9640125536 +ATLAS ATLASWZRAP11 19 360622.88887275255 +ATLAS ATLASWZRAP11 20 340702.7760869247 +ATLAS ATLASWZRAP11 21 317134.5980054034 +ATLAS ATLASWZRAP11 22 133799.96364220293 +ATLAS ATLASWZRAP11 23 133448.86365596423 +ATLAS ATLASWZRAP11 24 132637.63442726055 +ATLAS ATLASWZRAP11 25 130409.92750454393 +ATLAS ATLASWZRAP11 26 130912.15120182281 +ATLAS ATLASWZRAP11 27 127353.39305518712 +ATLAS ATLASWZRAP11 28 117895.4802432277 +ATLAS ATLASWZRAP11 29 105832.75453175441 +ATLAS ATLASWZRAP11 30 88155.7125609602 +ATLAS ATLASWZRAP11 31 67414.70246827461 +ATLAS ATLASWZRAP11 32 44841.69930108373 +ATLAS ATLASWZRAP11 33 21730.44158986774 +CMS CMSZDIFF12 0 9886.334551571706 +CMS CMSZDIFF12 1 2906.558370791581 +CMS CMSZDIFF12 2 1065.8985621057848 +CMS CMSZDIFF12 3 456.26790474310286 +CMS CMSZDIFF12 4 224.22271558328507 +CMS CMSZDIFF12 5 110.44990446224114 +CMS CMSZDIFF12 6 62.579695282891535 +CMS CMSZDIFF12 7 30.063450520529134 +CMS CMSZDIFF12 8 13.922543892672062 +CMS CMSZDIFF12 9 0.6141016049164042 +CMS CMSZDIFF12 10 9856.366953238841 +CMS CMSZDIFF12 11 2865.600713840613 +CMS CMSZDIFF12 12 1059.860323720172 +CMS CMSZDIFF12 13 445.179296515881 +CMS CMSZDIFF12 14 212.87853289770084 +CMS CMSZDIFF12 15 109.79806111700381 +CMS CMSZDIFF12 16 57.81793265111689 +CMS CMSZDIFF12 17 30.846128145509127 +CMS CMSZDIFF12 18 13.294573202916089 +CMS CMSZDIFF12 19 0.6295615951503276 +CMS CMSZDIFF12 20 9172.143310236264 +CMS CMSZDIFF12 21 2584.4159164450075 +CMS CMSZDIFF12 22 934.5819007730142 +CMS CMSZDIFF12 23 414.03599558528543 +CMS CMSZDIFF12 24 202.31230308651277 +CMS CMSZDIFF12 25 101.61697272749291 +CMS CMSZDIFF12 26 55.241506126064486 +CMS CMSZDIFF12 27 28.99689277027626 +CMS CMSZDIFF12 28 13.300620394041099 +CMS CMSZDIFF12 29 0.5485904704152681 +CMS CMSZDIFF12 30 6881.389313938887 +CMS CMSZDIFF12 31 1929.5303610925562 +CMS CMSZDIFF12 32 742.7463056241985 +CMS CMSZDIFF12 33 318.6113512739281 +CMS CMSZDIFF12 34 166.3854976523764 +CMS CMSZDIFF12 35 83.77177274421992 +CMS CMSZDIFF12 36 47.38911931541346 +CMS CMSZDIFF12 37 23.574007377898496 +CMS CMSZDIFF12 38 11.935536472481251 +CMS CMSZDIFF12 39 0.45705487477676426 +CMS CMSZDIFF12 40 3707.934954547729 +CMS CMSZDIFF12 41 1021.7408959849994 +CMS CMSZDIFF12 42 383.9123726446375 +CMS CMSZDIFF12 43 178.03122251767607 +CMS CMSZDIFF12 44 92.3591212500585 +CMS CMSZDIFF12 45 47.898485423688264 +CMS CMSZDIFF12 46 28.687924568924732 +CMS CMSZDIFF12 47 15.073356404547805 +CMS CMSZDIFF12 48 6.8587092533392475 +CMS CMSZDIFF12 49 0.2537524987802203 +CMS CMSJETS11 0 3064.8412856542986 +CMS CMSJETS11 1 1449.5091642980894 +CMS CMSJETS11 2 676.135506669856 +CMS CMSJETS11 3 328.6688802887687 +CMS CMSJETS11 4 170.7883047739323 +CMS CMSJETS11 5 89.97188258438256 +CMS CMSJETS11 6 48.1640183282502 +CMS CMSJETS11 7 26.21942900354327 +CMS CMSJETS11 8 14.019421384758264 +CMS CMSJETS11 9 7.99060694491261 +CMS CMSJETS11 10 4.620071821124748 +CMS CMSJETS11 11 2.653929526854456 +CMS CMSJETS11 12 1.5706174029329598 +CMS CMSJETS11 13 0.8776470184526391 +CMS CMSJETS11 14 0.5180721939935242 +CMS CMSJETS11 15 0.3138789290162737 +CMS CMSJETS11 16 0.18168510415487119 +CMS CMSJETS11 17 0.10788319709417281 +CMS CMSJETS11 18 0.06449279142848932 +CMS CMSJETS11 19 0.03712444722658268 +CMS CMSJETS11 20 0.021921922929691376 +CMS CMSJETS11 21 0.013033700911296818 +CMS CMSJETS11 22 0.007512607144544201 +CMS CMSJETS11 23 0.004114373157279753 +CMS CMSJETS11 24 0.0022362655927216464 +CMS CMSJETS11 25 0.001482939378122927 +CMS CMSJETS11 26 0.0007342552398618355 +CMS CMSJETS11 27 0.00031227731387260945 +CMS CMSJETS11 28 0.00020129807744728782 +CMS CMSJETS11 29 9.225641840455252e-05 +CMS CMSJETS11 30 5.8513477579783676e-05 +CMS CMSJETS11 31 1.4973792398939266e-05 +CMS CMSJETS11 32 2.257710172411508e-06 +CMS CMSJETS11 33 2938.4983182356086 +CMS CMSJETS11 34 1306.838248806242 +CMS CMSJETS11 35 597.4152766849537 +CMS CMSJETS11 36 290.0206657424005 +CMS CMSJETS11 37 152.23116359185778 +CMS CMSJETS11 38 83.52798139030051 +CMS CMSJETS11 39 42.09591785153588 +CMS CMSJETS11 40 22.8506640473896 +CMS CMSJETS11 41 13.020425758807377 +CMS CMSJETS11 42 7.290889884016562 +CMS CMSJETS11 43 4.026941804347156 +CMS CMSJETS11 44 2.3465671617499315 +CMS CMSJETS11 45 1.3340712136907575 +CMS CMSJETS11 46 0.7737929315587418 +CMS CMSJETS11 47 0.43960904171665677 +CMS CMSJETS11 48 0.25276210474175886 +CMS CMSJETS11 49 0.14717720949714796 +CMS CMSJETS11 50 0.0847412733331661 +CMS CMSJETS11 51 0.048881737479790216 +CMS CMSJETS11 52 0.028451560671024542 +CMS CMSJETS11 53 0.01599524989930021 +CMS CMSJETS11 54 0.008733853605811368 +CMS CMSJETS11 55 0.004594104744837374 +CMS CMSJETS11 56 0.002236004787141291 +CMS CMSJETS11 57 0.0014403506626033526 +CMS CMSJETS11 58 0.0007585311754938085 +CMS CMSJETS11 59 0.0003499911931369715 +CMS CMSJETS11 60 0.00011161977018081332 +CMS CMSJETS11 61 7.773134653806228e-05 +CMS CMSJETS11 62 1.0739797452865742e-05 +CMS CMSJETS11 63 2582.266754202891 +CMS CMSJETS11 64 1096.3104931367543 +CMS CMSJETS11 65 515.8803717520409 +CMS CMSJETS11 66 247.584409749495 +CMS CMSJETS11 67 125.56185712566229 +CMS CMSJETS11 68 64.59934467165961 +CMS CMSJETS11 69 33.7709164498504 +CMS CMSJETS11 70 18.28629614870014 +CMS CMSJETS11 71 9.863997071005198 +CMS CMSJETS11 72 5.208105065908411 +CMS CMSJETS11 73 2.890135232927644 +CMS CMSJETS11 74 1.6089548948524601 +CMS CMSJETS11 75 0.849452414837028 +CMS CMSJETS11 76 0.4673888181261503 +CMS CMSJETS11 77 0.2668471983983226 +CMS CMSJETS11 78 0.14592329381516939 +CMS CMSJETS11 79 0.07508557256079366 +CMS CMSJETS11 80 0.040293907781475916 +CMS CMSJETS11 81 0.02119984449211859 +CMS CMSJETS11 82 0.010995803998216362 +CMS CMSJETS11 83 0.005466812931544958 +CMS CMSJETS11 84 0.002642109565833648 +CMS CMSJETS11 85 0.0013200185870372624 +CMS CMSJETS11 86 0.0005247905933328646 +CMS CMSJETS11 87 0.00028209984999926743 +CMS CMSJETS11 88 0.0001720895024153124 +CMS CMSJETS11 89 1.1238503789800609e-05 +CMS CMSJETS11 90 2003.922630727413 +CMS CMSJETS11 91 874.731868408299 +CMS CMSJETS11 92 392.1741073514707 +CMS CMSJETS11 93 187.66789571263348 +CMS CMSJETS11 94 92.02147434259216 +CMS CMSJETS11 95 44.1730889548527 +CMS CMSJETS11 96 23.833613441467335 +CMS CMSJETS11 97 11.819012342827115 +CMS CMSJETS11 98 6.209444739322469 +CMS CMSJETS11 99 3.099195938055046 +CMS CMSJETS11 100 1.5455020113031772 +CMS CMSJETS11 101 0.8181216756637731 +CMS CMSJETS11 102 0.3978236711718021 +CMS CMSJETS11 103 0.1973724877272748 +CMS CMSJETS11 104 0.09488179243734336 +CMS CMSJETS11 105 0.04284285858513589 +CMS CMSJETS11 106 0.02013247664048886 +CMS CMSJETS11 107 0.00904569048761001 +CMS CMSJETS11 108 0.003957411563322136 +CMS CMSJETS11 109 0.0013947956627950616 +CMS CMSJETS11 110 0.0005423450935199612 +CMS CMSJETS11 111 0.00019426047276743468 +CMS CMSJETS11 112 8.641325482048829e-05 +CMS CMSJETS11 113 1.1205549962724038e-05 +CMS CMSJETS11 114 1378.9721889654777 +CMS CMSJETS11 115 586.5280099525319 +CMS CMSJETS11 116 254.46531592644695 +CMS CMSJETS11 117 117.48542213423157 +CMS CMSJETS11 118 50.164169623489386 +CMS CMSJETS11 119 23.869966504044648 +CMS CMSJETS11 120 10.595115571480735 +CMS CMSJETS11 121 4.680940497616173 +CMS CMSJETS11 122 2.026200900502425 +CMS CMSJETS11 123 0.9369729158473754 +CMS CMSJETS11 124 0.3821993863199565 +CMS CMSJETS11 125 0.14796011741945186 +CMS CMSJETS11 126 0.05808605576889366 +CMS CMSJETS11 127 0.020258461063771327 +CMS CMSJETS11 128 0.006549884219507623 +CMS CMSJETS11 129 0.0021102678286488382 +CMS CMSJETS11 130 0.00027998963463139373 +CMS CMSJETS11 131 8.597370643627551e-05 +CMS CMSJETS11 132 9.361652706292585e-06 diff --git a/validphys2/src/validphys/tests/test_pythonmakereplica.py b/validphys2/src/validphys/tests/test_pythonmakereplica.py new file mode 100644 index 0000000000..627de972de --- /dev/null +++ b/validphys2/src/validphys/tests/test_pythonmakereplica.py @@ -0,0 +1,86 @@ +""" +test_pythonmakereplica.py + +Module for testing the python implementation of make replica + +""" +from copy import deepcopy + +import numpy as np +from pandas.testing import assert_frame_equal, assert_series_equal +import pytest + +from validphys.api import API +from validphys.pseudodata import make_replica +from validphys.tests.conftest import DATA +from validphys.tests.test_covmats import CORR_DATA + + +SINGLE_SYS_DATASETS = [ + {"dataset": "DYE886R"}, + {"dataset": "D0ZRAP", "cfac": ["QCD"]}, + {"dataset": "ATLAS_SINGLETOP_TCH_R_13TEV", "cfac": ["QCD"]}, + {"dataset": "CMS_SINGLETOP_TCH_R_8TEV", "cfac": ["QCD"]}, + {"dataset": "CMS_SINGLETOP_TCH_R_13TEV", "cfac": ["QCD"]}, +] + +@pytest.mark.parametrize("use_cuts", ["nocuts", "internal"]) +@pytest.mark.parametrize("dataset_inputs", [DATA, CORR_DATA, SINGLE_SYS_DATASETS]) +def test_commondata_unchanged(data_config, dataset_inputs, use_cuts): + """Load the commondata, then generate some pseudodata using make replica + Check that the following attributes of the commondata have not been + modified: central_values, commondata_table, systematics_table. + + """ + config = dict(data_config) + config["dataset_inputs"] = dataset_inputs + config["use_cuts"] = use_cuts + ld_cds = API.dataset_inputs_loaded_cd_with_cuts(**config) + + # keep a copy of all dataframes/series pre make replica + pre_mkrep_cvs = [deepcopy(cd.central_values) for cd in ld_cds] + pre_mkrep_sys_tabs = [deepcopy(cd.systematics_table) for cd in ld_cds] + pre_mkrep_cd_tabs = [deepcopy(cd.commondata_table) for cd in ld_cds] + + make_replica(ld_cds) + + for post_mkrep_cd, pre_mkrep_cv in zip(ld_cds, pre_mkrep_cvs): + assert_series_equal(post_mkrep_cd.central_values, pre_mkrep_cv) + + for post_mkrep_cd, pre_mkrep_sys_tab in zip(ld_cds, pre_mkrep_sys_tabs): + assert_frame_equal(post_mkrep_cd.systematics_table, pre_mkrep_sys_tab) + + for post_mkrep_cd, pre_mkrep_cd_tab in zip(ld_cds, pre_mkrep_cd_tabs): + assert_frame_equal(post_mkrep_cd.commondata_table, pre_mkrep_cd_tab) + + +@pytest.mark.parametrize("use_cuts", ["nocuts", "internal"]) +@pytest.mark.parametrize("dataset_inputs", [DATA, CORR_DATA, SINGLE_SYS_DATASETS]) +def test_pseudodata_seeding(data_config, dataset_inputs, use_cuts): + """Check that using a seed reproduces the pseudodata. Note that this also + will check that the commondata hasn't been modified since reproducing + the same commondata requires that the commondata is unchanged and that + the random numbers are generated and used identically. + + """ + seed = 123456 + config = dict(data_config) + config["dataset_inputs"] = dataset_inputs + config["use_cuts"] = use_cuts + ld_cds = API.dataset_inputs_loaded_cd_with_cuts(**config) + rep_1 = make_replica(ld_cds, seed=seed) + rep_2 = make_replica(ld_cds, seed=seed) + np.testing.assert_allclose(rep_1, rep_2) + + +@pytest.mark.parametrize("use_cuts", ["nocuts", "internal"]) +@pytest.mark.parametrize("dataset_inputs", [DATA, CORR_DATA, SINGLE_SYS_DATASETS]) +def test_pseudodata_has_correct_ndata(data_config, dataset_inputs, use_cuts): + """Check that we get the correct ndata when generating pseudodata""" + config = dict(data_config) + config["dataset_inputs"] = dataset_inputs + config["use_cuts"] = use_cuts + ld_cds = API.dataset_inputs_loaded_cd_with_cuts(**config) + rep = make_replica(ld_cds) + ndata = np.sum([cd.ndata for cd in ld_cds]) + assert len(rep) == ndata diff --git a/validphys2/src/validphys/tests/test_regressions.py b/validphys2/src/validphys/tests/test_regressions.py index 9dd2c10333..dd671ee3ec 100644 --- a/validphys2/src/validphys/tests/test_regressions.py +++ b/validphys2/src/validphys/tests/test_regressions.py @@ -18,8 +18,9 @@ import NNPDF from validphys import results from validphys.api import API -from validphys.tableloader import (parse_exp_mat, load_perreplica_chi2_table, +from validphys.tableloader import (parse_data_cv, parse_exp_mat, load_perreplica_chi2_table, sane_load, load_fits_chi2_table) +from validphys.tests.test_covmats import CORR_DATA @@ -51,6 +52,18 @@ def f_(*args, **kwargs): return f_ return decorator + +@make_table_comp(parse_data_cv) +def test_mcreplica(data_config): + config = dict(data_config) + config["dataset_inputs"] = CORR_DATA + seed = 123456 + # Use no cuts because if filter rules change in the + # future then this test will end up failing + rep = API.indexed_make_replica(**config, seed=seed) + return rep + + @make_table_comp(parse_exp_mat) def test_expcovmat(data_config): mat = API.groups_covmat_no_table(**data_config)