diff --git a/validphys2/src/validphys/app.py b/validphys2/src/validphys/app.py index a7cd2a6489..0b6615de3b 100644 --- a/validphys2/src/validphys/app.py +++ b/validphys2/src/validphys/app.py @@ -43,7 +43,7 @@ 'validphys.theorycovariance.tests', 'validphys.replica_selector', 'validphys.closuretest', - 'validphys.mc_gen_checks', + 'validphys.mc_gen', 'validphys.theoryinfo', 'validphys.pseudodata', 'validphys.renametools', diff --git a/validphys2/src/validphys/mc_gen_checks.py b/validphys2/src/validphys/mc_gen.py similarity index 65% rename from validphys2/src/validphys/mc_gen_checks.py rename to validphys2/src/validphys/mc_gen.py index fd0d6e7bc6..397aff3ccb 100644 --- a/validphys2/src/validphys/mc_gen_checks.py +++ b/validphys2/src/validphys/mc_gen.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -mc_gen_checks.py +mc_gen.py Tools to check the pseudo-data MC generation. """ @@ -19,61 +19,66 @@ log = logging.getLogger(__name__) -def art_rep_generation(experiments, nreplica:int, experiments_index): - """Generates the nreplica pseudodata replicas for a given experiment""" + + +def art_rep_generation(groups_data, nreplica:int): + """Generates the nreplica pseudodata replicas""" RandomGenerator.InitRNG(0,0) - for exp in experiments: - #Since we are going to modify the experiments, we copy them - #(and work on the copies) to avoid all - #sorts of weirdness with other providers. We don't want this to interact - #with ExperimentSpec at all, because it could do funny things with the - #cache when calling load(). We need to copy this yet again, for each - # of the noisy replicas. - real_exp = Experiment(exp.load()) + real_data_list = [] + art_replicas_list = [] + normart_replicas_list = [] + art_data_list = [] + + for group in groups_data: + real_group = group.load() art_replicas = [] normart_replicas = [] - real_data = real_exp.get_cv() + real_data = real_group.get_cv() # producing replicas - for i in range(nreplica): - replica_exp = Experiment(real_exp) - replica_exp.MakeReplica() - artrep = replica_exp.get_cv() + for _ in range(nreplica): + replica_group = Experiment(real_group) + replica_group.MakeReplica() + artrep = replica_group.get_cv() normartrep = artrep/real_data art_replicas.append(artrep) normart_replicas.append(normartrep) art_data = np.mean(art_replicas, axis=0) + art_data_list.append(art_data) + real_data_list.append(real_data) + art_replicas_list.append(art_replicas) + normart_replicas_list.append(normart_replicas) - return real_data, art_replicas, normart_replicas, art_data + art_replicas = np.concatenate(art_replicas_list, axis=1) + normart_replicas = np.concatenate(normart_replicas_list, axis=1) + art_data = np.concatenate(art_data_list) + real_data = np.concatenate(real_data_list) + + return real_data, art_replicas, normart_replicas, art_data -def per_point_art_rep_generation(experiments, nreplica:int, experiments_index): - """Generates the nreplica pseudodata replicas for a given experiment""" + +def per_point_art_rep_generation(groups_data, nreplica:int): + """Generates the nreplica pseudodata replicas for a given group""" RandomGenerator.InitRNG(0,0) - for exp in experiments: - #Since we are going to modify the experiments, we copy them - #(and work on the copies) to avoid all - #sorts of weirdness with other providers. We don't want this to interact - #with ExperimentSpec at all, because it could do funny things with the - #cache when calling load(). We need to copy this yet again, for each - # of the noisy replicas. - real_exp = Experiment(exp.load()) + for group in groups_data: + real_group = group.load() art_replicas = [] normart_replicas = [] - real_data = real_exp.get_cv() + real_data = real_group.get_cv() # producing replicas for i in range(nreplica): - replica_exp = Experiment(real_exp) - for point in range(len(replica_exp.get_cv())): - replica_exp.MakePerPointReplica(point) - artrep = replica_exp.get_cv() + replica_group = Experiment(real_group) + for point in range(len(replica_group.get_cv())): + replica_group.MakePerPointReplica(point) + artrep = replica_group.get_cv() normartrep = artrep/real_data art_replicas.append(artrep) normart_replicas.append(normartrep) @@ -83,13 +88,13 @@ def per_point_art_rep_generation(experiments, nreplica:int, experiments_index): return real_data, art_replicas, normart_replicas, art_data @figure -def art_data_residuals(art_rep_generation, nreplica:int, color="green"): +def art_data_residuals(art_rep_generation, color="green"): #pass """ Plot the residuals distribution of pseudodata compared to experiment. """ - real_data, art_replicas, normart_replicas, art_data = art_rep_generation + real_data, _, _, art_data = art_rep_generation residuals=real_data-art_data normresiduals = residuals/real_data @@ -104,16 +109,16 @@ def art_data_residuals(art_rep_generation, nreplica:int, color="green"): return fig @figure -def per_point_art_data_residuals(per_point_art_rep_generation, nreplica:int): - return art_data_residuals(per_point_art_rep_generation, nreplica, color="orange") +def per_point_art_data_residuals(per_point_art_rep_generation): + return art_data_residuals(per_point_art_rep_generation, color="orange") @figure -def art_data_distribution(art_rep_generation, nreplica:int, title='Artificial Data Distribution', color="green"): +def art_data_distribution(art_rep_generation, title='Artificial Data Distribution', color="green"): """ Plot of the distribution of pseudodata. """ - real_data, art_replicas, normart_replicas, art_data = art_rep_generation + real_data, _, _, art_data = art_rep_generation normart_data = art_data/real_data fig, ax = plt.subplots() @@ -127,16 +132,16 @@ def art_data_distribution(art_rep_generation, nreplica:int, title='Artificial Da return fig @figure -def per_point_art_data_distribution(per_point_art_rep_generation, nreplica:int): +def per_point_art_data_distribution(art_data_distribution,per_point_art_rep_generation, nreplica:int): return art_data_distribution(per_point_art_rep_generation, nreplica, title='Uncorrelated Artificial Data Distribution', color="orange") @figure -def art_data_moments(art_rep_generation, nreplica:int, color="green"): +def art_data_moments(art_rep_generation, color="green"): """ Returns the moments of the distributions per data point, as a histogram. """ - real_data, art_replicas, normart_replicas, art_data = art_rep_generation + real_data, _, normart_replicas, art_data = art_rep_generation artrep_array = np.asarray(normart_replicas) normart_data = art_data/real_data @@ -147,7 +152,7 @@ def art_data_moments(art_rep_generation, nreplica:int, color="green"): for momno, ax in zip(range(1,4), axes.flatten()): # Calculate moments moms = [] - for i, datapoint, normartdatapoint in zip(range(len(artrep_array.T)), artrep_array.T, normart_data): + for i, datapoint in zip(range(len(artrep_array.T)), artrep_array.T): moment = mom(datapoint, moment=momno) moms.append(moment) ax.hist(moms, bins=50, histtype='step', stacked=True, fill=False, color=color) @@ -167,7 +172,7 @@ def art_data_comparison(art_rep_generation, nreplica:int): """ Plots per datapoint of the distribution of replica values. """ - real_data, art_replicas, normart_replicas, art_data = art_rep_generation + real_data, _, normart_replicas, art_data = art_rep_generation artrep_array = np.asarray(normart_replicas) normart_data = art_data/real_data @@ -193,25 +198,25 @@ def art_data_comparison(art_rep_generation, nreplica:int): @figure -def one_art_data_residuals(art_rep_generation, nreplica:int, experiments): +def one_art_data_residuals(nreplica:int, groups_data): #pass """ Residuals plot for the first datapoint. """ RandomGenerator.InitRNG(0,0) - for exp in experiments: + for group in groups_data: - real_exp = Experiment(exp.load()) - real_data = real_exp.get_cv() + real_group = group.load() + real_data = real_group.get_cv() one_art_data = np.zeros(nreplica) one_data_index=0 #producing replicas for i in range(nreplica): - replica_exp = Experiment(real_exp) - replica_exp.MakeReplica() - one_art_data[i]=replica_exp.get_cv()[one_data_index] + replica_group = Experiment(real_group) + replica_group.MakeReplica() + one_art_data[i]=replica_group.get_cv()[one_data_index] fig, ax = plt.subplots() @@ -226,7 +231,7 @@ def one_art_data_residuals(art_rep_generation, nreplica:int, experiments): return fig @figure -def plot_deviation_from_mean(art_rep_generation, per_point_art_rep_generation, nreplica:int, experiments): +def plot_deviation_from_mean(art_rep_generation, per_point_art_rep_generation): real_data, art_replicas, normart_replicas, art_data = art_rep_generation ppreal_data, ppart_replicas, ppnormart_replicas, ppart_data = per_point_art_rep_generation @@ -246,15 +251,15 @@ def plot_deviation_from_mean(art_rep_generation, per_point_art_rep_generation, n return fig @table -def art_data_mean_table(art_rep_generation, nreplica:int, experiments): +def art_data_mean_table(art_rep_generation, groups_data): """Generate table for artdata mean values """ real_data, art_replicas, normart_replicas, art_data = art_rep_generation #residuals=real_data-art_data data=[] - for experiment in experiments: - for dataset in experiment.datasets: + for group in groups_data: + for dataset in group.datasets: ds = dataset.load() Ndata = ds.GetNData() for i in range(Ndata): @@ -264,4 +269,3 @@ def art_data_mean_table(art_rep_generation, nreplica:int, experiments): df = pd.DataFrame(data,columns=["DataSet","ArtData","ExpData","abs(residual)"]) return df - diff --git a/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv b/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv new file mode 100644 index 0000000000..17461162f5 --- /dev/null +++ b/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv @@ -0,0 +1,267 @@ + rep0 +0 605004.0229655241 +1 602440.6042534113 +2 620858.2593790491 +3 613230.2445669749 +4 639952.7905205782 +5 655949.8790361139 +6 636850.6054514458 +7 628994.3543439173 +8 657301.323486842 +9 627892.0285999136 +10 598536.2932218027 +11 450048.5200059017 +12 431216.7505910402 +13 447044.9288255663 +14 440004.6820123964 +15 429728.93818449834 +16 421295.20118263003 +17 384454.24090431526 +18 370980.411169651 +19 382612.0148703672 +20 354678.0131086338 +21 334603.4158950332 +22 128562.17973857898 +23 127342.18412537871 +24 124185.70091052185 +25 117188.71279217266 +26 110308.86504021735 +27 101783.07229286227 +28 84984.6415717532 +29 50391.89736764995 +30 217.31574309245264 +31 99.256686892602 +32 50.70196796309883 +33 28.166627969438913 +34 18.750385343394814 +35 10.637986338057157 +36 8.458165998514103 +37 5.161374135077057 +38 1.7440116151145275 +39 0.5166681241057582 +40 0.14344058134523924 +41 0.024785429275386427 +42 0.0025163509905126555 +43 11667.782124893894 +44 21223.526886450283 +45 13462.35543683801 +46 6138.873985151094 +47 2708.1167755085808 +48 1226.9779856735504 +49 575818.4164028777 +50 574257.0910354555 +51 580128.0112796086 +52 585059.2199059085 +53 584763.314801447 +54 596571.1334267028 +55 596936.3727577593 +56 600384.1891912837 +57 607933.3346544248 +58 588246.38141835 +59 556859.2623407103 +60 436504.26730428456 +61 432303.05461665214 +62 429878.81481606193 +63 423512.7804381009 +64 414307.7885251502 +65 405518.16477606195 +66 388224.2735100763 +67 375485.63061635545 +68 363704.71285150666 +69 342011.205090951 +70 319474.23506993934 +71 134206.51216829068 +72 133256.30384223114 +73 133281.55486784823 +74 132054.46387549827 +75 130885.56238010839 +76 128780.9376893244 +77 118628.79847887601 +78 106435.44676191603 +79 89055.8013789791 +80 68102.79690334377 +81 45021.34978934775 +82 21975.39579667182 +83 9930.95091649565 +84 2933.8462314989843 +85 1086.2279347212962 +86 465.0529875023162 +87 226.30802040953344 +88 111.79690230475649 +89 62.17381426664207 +90 30.845472233433497 +91 14.362338415025647 +92 0.613668421348077 +93 9932.826070454066 +94 2884.6121408528056 +95 1067.4232418905062 +96 450.52936603855994 +97 214.9415800499354 +98 111.97977024219476 +99 59.02227114568631 +100 30.124745159192784 +101 14.001335018551327 +102 0.6432016353212359 +103 9291.864652700102 +104 2612.102669987266 +105 952.876335785387 +106 422.62773612597147 +107 202.29727198956087 +108 104.80403433709903 +109 55.52211814833202 +110 29.47827303942286 +111 14.10284210742019 +112 0.5484354153200658 +113 6988.302774545696 +114 1953.070125553211 +115 720.4718120407176 +116 326.1560157083265 +117 163.06443813461684 +118 86.09929672384352 +119 46.6760518324352 +120 23.70075105915677 +121 11.811860841607936 +122 0.46671218710115586 +123 3773.1302184015512 +124 1038.010867040705 +125 389.0162743842787 +126 180.38234398606457 +127 92.85633549871146 +128 48.04747813864363 +129 29.151521424980952 +130 14.66798047732213 +131 6.972923144523964 +132 0.27501819107281494 +133 2685.283395705168 +134 1255.614216863319 +135 623.4123408118692 +136 300.9025858506389 +137 154.9496016877635 +138 85.6238960317311 +139 45.697395051818916 +140 24.643819750619578 +141 14.154083386403244 +142 8.083627826692325 +143 4.615053327117134 +144 2.6930698606373755 +145 1.6039820756213605 +146 0.9190543906329204 +147 0.5420879986612372 +148 0.3238060211053549 +149 0.19164359765485747 +150 0.11454603590748376 +151 0.06766963214283701 +152 0.039295554906154065 +153 0.023717740337089382 +154 0.013766586907955021 +155 0.007925087637831662 +156 0.004510292638598202 +157 0.0025257234625036358 +158 0.0015742723776101173 +159 0.0009174769620039548 +160 0.00036071719540926286 +161 0.00017939172781702523 +162 0.00010719970920129199 +163 5.10754171638108e-05 +164 9.494161579905887e-06 +165 4.097867641772405e-06 +166 2496.543317341297 +167 1155.2596235411136 +168 526.9753542249528 +169 273.3672352496259 +170 144.09730724029419 +171 73.8283798920788 +172 40.15431825945132 +173 22.253841536320582 +174 12.876862009947805 +175 7.147197673282873 +176 4.1000363261395325 +177 2.3541540270520005 +178 1.3730432793225176 +179 0.766449205276789 +180 0.4316549850474859 +181 0.2570742612596235 +182 0.15083018094262035 +183 0.08778978138524669 +184 0.05001032462453892 +185 0.029430375664826213 +186 0.01611415361553344 +187 0.008973177721778117 +188 0.004747317124419756 +189 0.0024668103139778887 +190 0.0015283063419005878 +191 0.0007787419053764184 +192 0.0003113213196017707 +193 0.00018410837384159157 +194 9.891363626568309e-05 +195 1.3627519290893994e-05 +196 2049.595793988463 +197 937.9099866627081 +198 440.85996141143124 +199 222.39625542891935 +200 112.54650664349728 +201 58.05800257833228 +202 32.120045403096555 +203 16.32868970476539 +204 9.173988069096177 +205 5.183254049935798 +206 2.8208283943003707 +207 1.6101568970220699 +208 0.8518829065147376 +209 0.484567635991665 +210 0.2695394668785933 +211 0.14428600385730475 +212 0.07791175268089195 +213 0.04329623461829344 +214 0.02197485278250843 +215 0.011369326958557987 +216 0.0056973672800596205 +217 0.002712782984102383 +218 0.0013469711694377787 +219 0.0006248060260332912 +220 0.0002558575769424933 +221 0.00016316339858413995 +222 8.002430545648022e-06 +223 1575.5050472067082 +224 728.6008273425452 +225 321.69440777243614 +226 151.0771234502 +227 75.52867010203853 +228 38.982186662924384 +229 20.356929633419362 +230 9.39420524561827 +231 5.188908325330907 +232 2.6002888128368964 +233 1.3368274693702318 +234 0.6800019692538632 +235 0.34715311449716724 +236 0.17893906702328258 +237 0.0794910332147346 +238 0.03442652404659983 +239 0.016486812605305875 +240 0.007590628806292284 +241 0.003170146326010336 +242 0.0012235655803027057 +243 0.000430484039887203 +244 0.00012033484918649306 +245 3.012808100117648e-05 +246 6.555863621871152e-06 +247 957.1097986160261 +248 406.8294452550777 +249 190.23178545891966 +250 89.90518748652033 +251 39.2527330848522 +252 17.795728828437618 +253 8.172773534716532 +254 3.258833345760595 +255 1.5580913277549024 +256 0.6302044867930973 +257 0.2604667628574771 +258 0.10035630960385046 +259 0.04013064955098406 +260 0.011580736083436731 +261 0.0043259887879153825 +262 0.0012483709943641582 +263 0.0002272026342898553 +264 4.60377440249738e-05 +265 5.936781013576351e-06 diff --git a/validphys2/src/validphys/tests/test_regressions.py b/validphys2/src/validphys/tests/test_regressions.py index dd671ee3ec..2acaf926da 100644 --- a/validphys2/src/validphys/tests/test_regressions.py +++ b/validphys2/src/validphys/tests/test_regressions.py @@ -18,6 +18,7 @@ import NNPDF from validphys import results from validphys.api import API +from validphys.tests.test_covmats import CORR_DATA from validphys.tableloader import (parse_data_cv, parse_exp_mat, load_perreplica_chi2_table, sane_load, load_fits_chi2_table) from validphys.tests.test_covmats import CORR_DATA @@ -125,3 +126,12 @@ def test_datasetchi2(data_singleexp_witht0_config): exps = API.groups_data(**data_singleexp_witht0_config) chi2s = API.groups_datasets_chi2_data(**data_singleexp_witht0_config) return results.fits_datasets_chi2_table(['test'], [exps], [chi2s]) + +@make_table_comp(sane_load) +def test_art_rep_generation(data_config): + config = dict(data_config) + config["dataset_inputs"] = CORR_DATA + config["fitting"] = {"seed": 123456} + config["nreplica"] = 1 + _, art_replicas, _,_ = API.art_rep_generation(**config) + return pd.DataFrame(art_replicas.T, columns=['rep0'])