From dee1095438e2d2be596f522609c62cd229b6ae17 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 27 Jan 2022 16:52:17 +0100 Subject: [PATCH 1/8] remove ambiguity with MC PDFs with respect to hessian PDFs --- validphys2/src/validphys/core.py | 41 ++++++++------------- validphys2/src/validphys/lhapdfset.py | 15 ++------ validphys2/src/validphys/tests/test_core.py | 3 +- 3 files changed, 19 insertions(+), 40 deletions(-) diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 2988985722..bd0a0f11e2 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -229,10 +229,8 @@ def legacy_load(self): @property def grid_values_index(self): """A range object describing which members are selected in a - ``pdf.load().grid_values`` operation. This is ``range(1, - len(pdf))`` for Monte Carlo sets, because replica 0 is not selected - and ``range(0, len(pdf))`` for hessian sets. - + ``pdf.load().grid_values`` operation. + Equal to range(0, len(pdf)) Returns ------- @@ -243,12 +241,7 @@ def grid_values_index(self): ----- The range object can be used efficiently as a Pandas index. """ - if self.error_type == "replicas": - return range(1, len(self)) - elif self.error_type in ("hessian", "symmhessian"): - return range(0, len(self)) - else: - raise RuntimeError(f"Unknown error type: {self.stats_class}") + return range(0, len(self)) def get_members(self): """Return the number of members selected in ``pdf.load().grid_values`` @@ -760,18 +753,27 @@ def load(self): def __str__(self): return str(self.path) -#TODO: Decide if we want methods or properties class Stats: + """The stats class holds the results of computations that involve PDFs + and knows how to compute statistical estimators for each kind of PDF. + The first index is always the central value. + + + Parameters + ---------- + data: np.array + Result of the computation, should always be N_pdf * N_bins + """ def __init__(self, data): """`data `should be N_pdf*N_bins""" self.data = np.atleast_2d(data) def central_value(self): - raise NotImplementedError() + return self.data[0] def error_members(self): - raise NotImplementedError() + return self.data[1:] def std_error(self): raise NotImplementedError() @@ -798,10 +800,6 @@ def errorbarstd(self): class MCStats(Stats): """Result obtained from a Monte Carlo sample""" - - def central_value(self): - return np.mean(self.data, axis=0) - def std_error(self): # ddof == 1 to match libNNPDF behaviour return np.std(self.data, ddof=1, axis=0) @@ -819,9 +817,6 @@ def errorbar68(self): def sample_values(self, size): return np.random.choice(self, size=size) - def error_members(self): - return self.data - class SymmHessianStats(Stats): """Compute stats in the 'symetric' hessian format: The first index (0) @@ -833,15 +828,9 @@ def __init__(self, data, rescale_factor=1): super().__init__(data) self.rescale_factor = rescale_factor - def central_value(self): - return self.data[0] - def errorbar68(self): return self.errorbarstd() - def error_members(self): - return self.data[1:] - def std_error(self): data = self.data diffsq = (data[0] - data[1:])**2 diff --git a/validphys2/src/validphys/lhapdfset.py b/validphys2/src/validphys/lhapdfset.py index 6b624d770e..e45ca56acb 100644 --- a/validphys2/src/validphys/lhapdfset.py +++ b/validphys2/src/validphys/lhapdfset.py @@ -7,15 +7,14 @@ The ``.members`` and ``.central_member`` of the ``LHAPDFSet`` are LHAPDF objects (the typical output from ``mkPDFs``) and can be used normally. - For MC PDFs the ``central_member`` is the average of all replicas (members 1-100) - while for Hessian PDFfs the ``central_member`` is also ``members[0]`` + The ``central_member`` PDF is also ``member[0]`` Examples -------- >>> from validphys.lhapdfset import LHAPDFSet >>> pdf = LHAPDFSet("NNPDF40_nnlo_as_01180", "replicas") >>> len(pdf.members) - 100 + 101 >>> pdf.central_member.alphasQ(91.19) 0.11800 >>> pdf.members[0].xfxQ2(0.5, 15625) @@ -59,11 +58,6 @@ def __init__(self, name, error_type): self._lhapdf_set = lhapdf.mkPDFs(name) self._flavors = None - @property - def is_monte_carlo(self): - """Check whether the error type is MC""" - return self._error_type == "replicas" - @property def is_t0(self): """Check whether we are in t0 mode""" @@ -78,13 +72,10 @@ def n_members(self): def members(self): """Return the members of the set, this depends on the error type: t0: returns only member 0 - MC: skip member 0 - Hessian: return all + MC & Hessian: return all """ if self.is_t0: return self._lhapdf_set[0:1] - if self.is_monte_carlo: - return self._lhapdf_set[1:] return self._lhapdf_set @property diff --git a/validphys2/src/validphys/tests/test_core.py b/validphys2/src/validphys/tests/test_core.py index 5dfefc705d..17ca8b3c4c 100644 --- a/validphys2/src/validphys/tests/test_core.py +++ b/validphys2/src/validphys/tests/test_core.py @@ -16,9 +16,8 @@ def test_pdf(pdf_name): assert pdf.isinstalled error_type = pdf.error_type if error_type == "replicas": - assert pdf.get_members() == (len(pdf)-1) assert pdf.error_conf_level is None else: - assert pdf.get_members() == len(pdf) assert isinstance(pdf.error_conf_level, (int, float)) + assert pdf.get_members() == len(pdf) assert pdf.name == pdf._plotname == pdf_name == str(pdf) From acff142768fe9aea727dbf5071c9504d01211721 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 27 Jan 2022 17:29:01 +0100 Subject: [PATCH 2/8] use stats for the theory predictions --- validphys2/src/validphys/results.py | 57 ++++++++++--------- .../src/validphys/tests/test_pyfkdata.py | 8 +-- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 0848ea6c1b..792b2878fc 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -36,7 +36,6 @@ ) from validphys.convolution import ( predictions, - central_predictions, PredictionsRequireCutsError, ) @@ -70,6 +69,12 @@ def __len__(self): class StatsResult(Result): + """Wrapper for a Results-holding Stats-class + It uses properties instead of methods. + Also while internally the `Stats` class holds the data + in a N_pdf x N_bins, this class treats data instead + as N_bins x N_pdf + """ def __init__(self, stats): self.stats = stats @@ -81,6 +86,22 @@ def central_value(self): def std_error(self): return self.stats.std_error() + @property + def data(self): + return self.stats.data.T + + @property + def _rawdata(self): + """Legacy""" + return self.data + + @property + def error_members(self): + return self.stats.error_members().T + + def __len__(self): + return len(self.central_value) + class DataResult(NNPDFDataResult): def __init__(self, dataobj, covmat, sqrtcovmat, central_value=None): @@ -106,27 +127,12 @@ def sqrtcovmat(self): return self._sqrtcovmat -class ThPredictionsResult(NNPDFDataResult): - """Class holding theory prediction - For legacy purposes it still accepts libNNPDF datatypes, but prefers python-pure stuff - """ - def __init__(self, dataobj, stats_class, label=None, central_value=None): +class ThPredictionsResult(StatsResult): + """Class holding theory predictions""" + def __init__(self, stats_data, stats_class, label=None): self.stats_class = stats_class self.label = label - # Ducktype the input into numpy arrays - try: - self._rawdata = dataobj.to_numpy() - # If the numpy conversion worked then we don't have a libNNPDF in our hands - stats = stats_class(self._rawdata.T) - self._std_error = stats.std_error() - except AttributeError: - self._std_error = dataobj.get_error() - self._rawdata = dataobj.get_data() - super().__init__(dataobj, central_value=central_value) - - @property - def std_error(self): - return self._std_error + super().__init__(stats_data) @staticmethod def make_label(pdf, dataset): @@ -152,20 +158,15 @@ def from_convolution(cls, pdf, dataset): datasets = (dataset,) try: - all_preds = [] - all_centrals = [] - for d in datasets: - all_preds.append(predictions(d, pdf)) - all_centrals.append(central_predictions(d, pdf)) + th_predictions = pd.concat([predictions(d, pdf) for d in datasets]) + stats_th = pdf.stats_class(th_predictions.T) except PredictionsRequireCutsError as e: raise PredictionsRequireCutsError("Predictions from FKTables always require cuts, " "if you want to use the fktable intrinsic cuts set `use_cuts: 'internal'`") from e - th_predictions = pd.concat(all_preds) - central_values = pd.concat(all_centrals) label = cls.make_label(pdf, dataset) - return cls(th_predictions, pdf.stats_class, label, central_value=central_values) + return cls(stats_th, pdf.stats_class, label) class PositivityResult(StatsResult): diff --git a/validphys2/src/validphys/tests/test_pyfkdata.py b/validphys2/src/validphys/tests/test_pyfkdata.py index db254cf844..dae4c2f27c 100644 --- a/validphys2/src/validphys/tests/test_pyfkdata.py +++ b/validphys2/src/validphys/tests/test_pyfkdata.py @@ -47,6 +47,8 @@ def test_cuts(): def test_predictions(): + """Check that the different kind of predictions can be performed + and that the results stay unchanged after they are processed by ThPredictionsResult""" l = Loader() pdf = l.check_pdf(PDF) dss = [ @@ -63,10 +65,8 @@ def test_predictions(): ] for ds in dss: preds = predictions(ds, pdf) - cppres = ThPredictionsResult.from_convolution(pdf, ds) - # Change the atol and rtol from 1e-8 and 1e-7 since DYE906R - # fails with the default setting - assert_allclose(preds.values, cppres._rawdata, atol=1e-8, rtol=1e-3) + nnpdf_res = ThPredictionsResult.from_convolution(pdf, ds) + assert_allclose(preds, nnpdf_res._rawdata) def test_extended_predictions(): l = Loader() From 488560648e59c8fae28d7d72f5073bcfc4fb2c36 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 27 Jan 2022 18:10:06 +0100 Subject: [PATCH 3/8] remove _rawdata and reference instead the explicit information being used data / error_members --- validphys2/src/validphys/calcutils.py | 4 ++-- validphys2/src/validphys/core.py | 4 ++-- validphys2/src/validphys/correlations.py | 6 +++--- validphys2/src/validphys/covmats.py | 2 +- validphys2/src/validphys/results.py | 11 +++-------- validphys2/src/validphys/tests/test_pyfkdata.py | 2 +- 6 files changed, 12 insertions(+), 17 deletions(-) diff --git a/validphys2/src/validphys/calcutils.py b/validphys2/src/validphys/calcutils.py index af4e721735..730a6a0818 100644 --- a/validphys2/src/validphys/calcutils.py +++ b/validphys2/src/validphys/calcutils.py @@ -70,7 +70,7 @@ def all_chi2(results): """Return the chi² for all elements in the result. Note that the interpretation of the result will depend on the PDF error type""" data_result, th_result = results - diffs = th_result._rawdata - data_result.central_value[:,np.newaxis] + diffs = th_result.data - data_result.central_value[:,np.newaxis] return calc_chi2(sqrtcov=data_result.sqrtcovmat, diffs=diffs) def central_chi2(results): @@ -85,7 +85,7 @@ def all_chi2_theory(results, totcov): """Like all_chi2 but here the chi² are calculated using a covariance matrix that is the sum of the experimental covmat and the theory covmat.""" data_result, th_result = results - diffs = th_result._rawdata - data_result.central_value[:,np.newaxis] + diffs = th_result.data - data_result.central_value[:,np.newaxis] total_covmat = np.array(totcov) return calc_chi2(sqrtcov=la.cholesky(total_covmat, lower=True), diffs=diffs) diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index bd0a0f11e2..275b5d877d 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -802,10 +802,10 @@ class MCStats(Stats): """Result obtained from a Monte Carlo sample""" def std_error(self): # ddof == 1 to match libNNPDF behaviour - return np.std(self.data, ddof=1, axis=0) + return np.std(self.error_members(), ddof=1, axis=0) def moment(self, order): - return np.mean(np.power(self.data-self.central_value(),order), axis=0) + return np.mean(np.power(self.error_members()-self.central_value(),order), axis=0) def errorbar68(self): #Use nanpercentile here because we can have e.g. 0/0==nan normalization diff --git a/validphys2/src/validphys/correlations.py b/validphys2/src/validphys/correlations.py index 0691a71f96..20f6aae386 100644 --- a/validphys2/src/validphys/correlations.py +++ b/validphys2/src/validphys/correlations.py @@ -60,13 +60,13 @@ def _basic_obs_obs_correlation(obsarr1, obsarr2): return x@y/np.outer(la.norm(x,axis=1),la.norm(y,axis=0)) -#TODO: Implement for other error types. Do not use the _rawdata. +#TODO: Implement for other error types. @check_pdf_is_montecarlo def obs_pdf_correlations(pdf, results, xplotting_grid): """Return the correlations between each point in a dataset and the PDF values on a grid of (x,f) points in a format similar to `xplotting_grid`.""" _ , th = results - corrs = _basic_obs_pdf_correlation(xplotting_grid.grid_values, th._rawdata) + corrs = _basic_obs_pdf_correlation(xplotting_grid.grid_values, th.error_members) return xplotting_grid._replace(grid_values=corrs) @@ -77,4 +77,4 @@ def obs_pdf_correlations(pdf, results, xplotting_grid): def obs_obs_correlations(pdf, corrpair_results): """Return the theoretical correlation matrix between a pair of observables.""" (_,th1), (_,th2) = corrpair_results - return _basic_obs_obs_correlation(th1._rawdata, th2._rawdata) + return _basic_obs_obs_correlation(th1.error_members, th2.error_members) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index bd09c15d5b..1d4c73c424 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -531,7 +531,7 @@ def pdferr_plus_covmat(dataset, pdf, covmat_t0_considered): True """ th = ThPredictionsResult.from_convolution(pdf, dataset) - pdf_cov = np.cov(th._rawdata, rowvar=True) + pdf_cov = np.cov(th.error_members, rowvar=True) return pdf_cov + covmat_t0_considered diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 792b2878fc..0b609273c5 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -90,11 +90,6 @@ def std_error(self): def data(self): return self.stats.data.T - @property - def _rawdata(self): - """Legacy""" - return self.data - @property def error_members(self): return self.stats.error_members().T @@ -260,7 +255,7 @@ def group_result_table_no_table(groups_results, groups_index): ): replicas = ( ("rep_%05d" % (i + 1), th_rep) - for i, th_rep in enumerate(th._rawdata[index, :]) + for i, th_rep in enumerate(th.error_members[index, :]) ) result_records.append( @@ -615,7 +610,7 @@ def dataset_inputs_bootstrap_phi_data(dataset_inputs_results, bootstrap_samples= For more information on how phi is calculated see `phi_data` """ dt, th = dataset_inputs_results - diff = np.array(th._rawdata - dt.central_value[:, np.newaxis]) + diff = np.array(th.error_members - dt.central_value[:, np.newaxis]) phi_resample = bootstrap_values( diff, bootstrap_samples, @@ -635,7 +630,7 @@ def dataset_inputs_bootstrap_chi2_central( a different value can be specified in the runcard. """ dt, th = dataset_inputs_results - diff = np.array(th._rawdata - dt.central_value[:, np.newaxis]) + diff = np.array(th.error_members - dt.central_value[:, np.newaxis]) cchi2 = lambda x, y: calc_chi2(y, x.mean(axis=1)) chi2_central_resample = bootstrap_values( diff, diff --git a/validphys2/src/validphys/tests/test_pyfkdata.py b/validphys2/src/validphys/tests/test_pyfkdata.py index dae4c2f27c..c4100fcfbb 100644 --- a/validphys2/src/validphys/tests/test_pyfkdata.py +++ b/validphys2/src/validphys/tests/test_pyfkdata.py @@ -66,7 +66,7 @@ def test_predictions(): for ds in dss: preds = predictions(ds, pdf) nnpdf_res = ThPredictionsResult.from_convolution(pdf, ds) - assert_allclose(preds, nnpdf_res._rawdata) + assert_allclose(preds, nnpdf_res.data) def test_extended_predictions(): l = Loader() From 16ea3a86fdf5aa1ab129918b2eb8e57aec2077b9 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 27 Jan 2022 18:10:21 +0100 Subject: [PATCH 4/8] same as previous commit for closure test, always MC right? --- .../src/validphys/closuretest/closure_results.py | 6 +++--- validphys2/src/validphys/closuretest/multiclosure.py | 10 +++++----- .../validphys/closuretest/multiclosure_pseudodata.py | 2 +- validphys2/src/validphys/tests/test_closuretest.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/validphys2/src/validphys/closuretest/closure_results.py b/validphys2/src/validphys/closuretest/closure_results.py index 9f30cf15ee..cfda6ae555 100644 --- a/validphys2/src/validphys/closuretest/closure_results.py +++ b/validphys2/src/validphys/closuretest/closure_results.py @@ -122,7 +122,7 @@ def bootstrap_bias_experiment( """ dt_ct, th_ct = dataset_inputs_results ((_, th_ul),) = underlying_dataset_inputs_results - th_ct_boot_cv = bootstrap_values(th_ct._rawdata, bootstrap_samples) + th_ct_boot_cv = bootstrap_values(th_ct.error_members, bootstrap_samples) boot_diffs = th_ct_boot_cv - th_ul.central_value[:, np.newaxis] boot_bias = calc_chi2(dt_ct.sqrtcovmat, boot_diffs) / len(dt_ct) return boot_bias @@ -191,7 +191,7 @@ def variance_dataset(results, fit, use_fitcommondata): """ dt, th = results - diff = th.central_value[:, np.newaxis] - th._rawdata + diff = th.central_value[:, np.newaxis] - th.error_members var_unnorm = calc_chi2(dt.sqrtcovmat, diff).mean() return VarianceData(var_unnorm, len(th)) @@ -210,7 +210,7 @@ def bootstrap_variance_experiment(dataset_inputs_results, bootstrap_samples=500) normalised to the number of data in the experiment. """ dt_ct, th_ct = dataset_inputs_results - diff = th_ct.central_value[:, np.newaxis] - th_ct._rawdata + diff = th_ct.central_value[:, np.newaxis] - th_ct.error_members var_unnorm_boot = bootstrap_values( diff, bootstrap_samples, diff --git a/validphys2/src/validphys/closuretest/multiclosure.py b/validphys2/src/validphys/closuretest/multiclosure.py index c784c8996a..9e5f8d5d7a 100644 --- a/validphys2/src/validphys/closuretest/multiclosure.py +++ b/validphys2/src/validphys/closuretest/multiclosure.py @@ -130,7 +130,7 @@ def fits_dataset_bias_variance( """ closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader # The dimentions here are (fit, data point, replica) - reps = np.asarray([th._rawdata[:, :_internal_max_reps] for th in closures_th]) + reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th]) # take mean across replicas - since we might have changed no. of reps centrals = reps.mean(axis=2) # place bins on first axis @@ -226,7 +226,7 @@ def dataset_xi(internal_multiclosure_dataset_loader): """ closures_th, law_th, covmat, _ = internal_multiclosure_dataset_loader - replicas = np.asarray([th._rawdata for th in closures_th]) + replicas = np.asarray([th.error_members for th in closures_th]) centrals = np.mean(replicas, axis=-1) underlying = law_th.central_value @@ -297,7 +297,7 @@ class BootstrappedTheoryResult: """ def __init__(self, data): - self._rawdata = data + self.data = data self.central_value = data.mean(axis=1) @@ -341,7 +341,7 @@ def _bootstrap_multiclosure_fits( # construct proxy fits theory predictions for fit_th in fit_boot_th: rep_boot_index = rng.choice(n_rep_max, size=n_rep, replace=use_repeats) - boot_ths.append(BootstrappedTheoryResult(fit_th._rawdata[:, rep_boot_index])) + boot_ths.append(BootstrappedTheoryResult(fit_th.error_members[:, rep_boot_index])) return (boot_ths, *input_tuple) @@ -741,7 +741,7 @@ def dataset_fits_bias_replicas_variance_samples( """ closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader # The dimentions here are (fit, data point, replica) - reps = np.asarray([th._rawdata[:, :_internal_max_reps] for th in closures_th]) + reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th]) # take mean across replicas - since we might have changed no. of reps centrals = reps.mean(axis=2) # place bins on first axis diff --git a/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py b/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py index ff32d86fd1..0fc8e0b360 100644 --- a/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py +++ b/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py @@ -126,7 +126,7 @@ def experiments_closure_pseudodata_estimators_table( fits_erep_delta_chi2 = [] for i_fit, fit_th in enumerate(closures_th): # some of these could be done outside of loop, but easier to do here. - th_replicas = fit_th._rawdata + th_replicas = fit_th.error_members th_central = np.mean(th_replicas, axis=-1) dt_replicas = fits_reps_pseudo[i_fit].xs(exp_name, axis=0, level=0).to_numpy() dt_central = fits_cv.xs(exp_name, axis=0, level=0).iloc[:, i_fit].to_numpy() diff --git a/validphys2/src/validphys/tests/test_closuretest.py b/validphys2/src/validphys/tests/test_closuretest.py index a55eb4063a..181d05e550 100644 --- a/validphys2/src/validphys/tests/test_closuretest.py +++ b/validphys2/src/validphys/tests/test_closuretest.py @@ -13,7 +13,7 @@ class TestResult: """class for testing base level estimators which expect a results object""" def __init__(self, central_value, rawdata=None): self.central_value = central_value - self._rawdata = rawdata + self.data = rawdata self.ndata = len(central_value) self.sqrtcovmat = np.identity(self.ndata) From 4c8a9c2edc5cef8ee80c1764bd2979e2238abc95 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 27 Jan 2022 18:34:19 +0100 Subject: [PATCH 5/8] make effective exponents do the stats correctly --- validphys2/src/validphys/core.py | 2 +- validphys2/src/validphys/eff_exponents.py | 11 ++++------- validphys2/src/validphys/tests/test_effexponents.py | 10 ++++++++-- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 275b5d877d..2fcc328060 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -762,7 +762,7 @@ class Stats: Parameters ---------- data: np.array - Result of the computation, should always be N_pdf * N_bins + Result of the computation, should always be N_pdf * N_bins * ... """ def __init__(self, data): diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py index a7b07c2ecc..32bd57abfb 100644 --- a/validphys2/src/validphys/eff_exponents.py +++ b/validphys2/src/validphys/eff_exponents.py @@ -348,17 +348,14 @@ def next_effective_exponents_table( eff_exp_data = [] - alphagrid = alpha_effs.grid_values - betagrid = beta_effs.grid_values - - alphastats = pdf.stats_class(alphagrid) - betastats = pdf.stats_class(betagrid) + alphastats = pdf.stats_class(alpha_effs.grid_values) + betastats = pdf.stats_class(beta_effs.grid_values) with warnings.catch_warnings(): warnings.simplefilter('ignore', RuntimeWarning) - alpha_cv = np.nanmean(alphagrid, axis=0) - beta_cv = np.nanmean(betagrid, axis=0) + alpha_cv = np.nanmean(alphastats.error_members(), axis=0) + beta_cv = np.nanmean(betastats.error_members(), axis=0) #tuple of low and high values repectively alpha68 = alphastats.errorbar68() beta68 = betastats.errorbar68() diff --git a/validphys2/src/validphys/tests/test_effexponents.py b/validphys2/src/validphys/tests/test_effexponents.py index 586786c459..e5d5c4ba89 100644 --- a/validphys2/src/validphys/tests/test_effexponents.py +++ b/validphys2/src/validphys/tests/test_effexponents.py @@ -1,4 +1,5 @@ import pytest +import numpy as np from reportengine.compat import yaml @@ -43,5 +44,10 @@ def test_next_runcard(): if seed in runcard: runcard.pop(seed) - # Check that the actual ite2 runcard matches what vp thinks it should be - assert predicted_ite2_runcard == ite2_runcard + # We are only interested in the basis so check that that is the same + # instead of testing the whole runcard + predicted_basis = predicted_ite2_runcard["fitting"]["basis"] + reference_basis = ite2_runcard["fitting"]["basis"] + + for pf, rf in zip(predicted_basis, reference_basis): + pf == rf From d04f3d70ec45703f7314d6aa1fa8f699709114f5 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 27 Jan 2022 18:51:50 +0100 Subject: [PATCH 6/8] fix closure tests --- validphys2/src/validphys/closuretest/closure_results.py | 6 +++--- validphys2/src/validphys/closuretest/multiclosure.py | 8 ++++---- .../src/validphys/closuretest/multiclosure_pseudodata.py | 2 +- validphys2/src/validphys/correlations.py | 4 ++-- validphys2/src/validphys/tests/test_closuretest.py | 4 ++++ 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/validphys2/src/validphys/closuretest/closure_results.py b/validphys2/src/validphys/closuretest/closure_results.py index cfda6ae555..f62dbb0d71 100644 --- a/validphys2/src/validphys/closuretest/closure_results.py +++ b/validphys2/src/validphys/closuretest/closure_results.py @@ -122,7 +122,7 @@ def bootstrap_bias_experiment( """ dt_ct, th_ct = dataset_inputs_results ((_, th_ul),) = underlying_dataset_inputs_results - th_ct_boot_cv = bootstrap_values(th_ct.error_members, bootstrap_samples) + th_ct_boot_cv = bootstrap_values(th_ct.data, bootstrap_samples) boot_diffs = th_ct_boot_cv - th_ul.central_value[:, np.newaxis] boot_bias = calc_chi2(dt_ct.sqrtcovmat, boot_diffs) / len(dt_ct) return boot_bias @@ -191,7 +191,7 @@ def variance_dataset(results, fit, use_fitcommondata): """ dt, th = results - diff = th.central_value[:, np.newaxis] - th.error_members + diff = th.central_value[:, np.newaxis] - th.data var_unnorm = calc_chi2(dt.sqrtcovmat, diff).mean() return VarianceData(var_unnorm, len(th)) @@ -210,7 +210,7 @@ def bootstrap_variance_experiment(dataset_inputs_results, bootstrap_samples=500) normalised to the number of data in the experiment. """ dt_ct, th_ct = dataset_inputs_results - diff = th_ct.central_value[:, np.newaxis] - th_ct.error_members + diff = th_ct.central_value[:, np.newaxis] - th_ct.data var_unnorm_boot = bootstrap_values( diff, bootstrap_samples, diff --git a/validphys2/src/validphys/closuretest/multiclosure.py b/validphys2/src/validphys/closuretest/multiclosure.py index 9e5f8d5d7a..7deecb749e 100644 --- a/validphys2/src/validphys/closuretest/multiclosure.py +++ b/validphys2/src/validphys/closuretest/multiclosure.py @@ -130,7 +130,7 @@ def fits_dataset_bias_variance( """ closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader # The dimentions here are (fit, data point, replica) - reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th]) + reps = np.asarray([th.data[:, :_internal_max_reps] for th in closures_th]) # take mean across replicas - since we might have changed no. of reps centrals = reps.mean(axis=2) # place bins on first axis @@ -226,7 +226,7 @@ def dataset_xi(internal_multiclosure_dataset_loader): """ closures_th, law_th, covmat, _ = internal_multiclosure_dataset_loader - replicas = np.asarray([th.error_members for th in closures_th]) + replicas = np.asarray([th.data for th in closures_th]) centrals = np.mean(replicas, axis=-1) underlying = law_th.central_value @@ -341,7 +341,7 @@ def _bootstrap_multiclosure_fits( # construct proxy fits theory predictions for fit_th in fit_boot_th: rep_boot_index = rng.choice(n_rep_max, size=n_rep, replace=use_repeats) - boot_ths.append(BootstrappedTheoryResult(fit_th.error_members[:, rep_boot_index])) + boot_ths.append(BootstrappedTheoryResult(fit_th.data[:, rep_boot_index])) return (boot_ths, *input_tuple) @@ -741,7 +741,7 @@ def dataset_fits_bias_replicas_variance_samples( """ closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader # The dimentions here are (fit, data point, replica) - reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th]) + reps = np.asarray([th.data[:, :_internal_max_reps] for th in closures_th]) # take mean across replicas - since we might have changed no. of reps centrals = reps.mean(axis=2) # place bins on first axis diff --git a/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py b/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py index 0fc8e0b360..14269fd11f 100644 --- a/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py +++ b/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py @@ -126,7 +126,7 @@ def experiments_closure_pseudodata_estimators_table( fits_erep_delta_chi2 = [] for i_fit, fit_th in enumerate(closures_th): # some of these could be done outside of loop, but easier to do here. - th_replicas = fit_th.error_members + th_replicas = fit_th.data th_central = np.mean(th_replicas, axis=-1) dt_replicas = fits_reps_pseudo[i_fit].xs(exp_name, axis=0, level=0).to_numpy() dt_central = fits_cv.xs(exp_name, axis=0, level=0).iloc[:, i_fit].to_numpy() diff --git a/validphys2/src/validphys/correlations.py b/validphys2/src/validphys/correlations.py index 20f6aae386..998b46066d 100644 --- a/validphys2/src/validphys/correlations.py +++ b/validphys2/src/validphys/correlations.py @@ -60,13 +60,13 @@ def _basic_obs_obs_correlation(obsarr1, obsarr2): return x@y/np.outer(la.norm(x,axis=1),la.norm(y,axis=0)) -#TODO: Implement for other error types. @check_pdf_is_montecarlo def obs_pdf_correlations(pdf, results, xplotting_grid): """Return the correlations between each point in a dataset and the PDF values on a grid of (x,f) points in a format similar to `xplotting_grid`.""" _ , th = results - corrs = _basic_obs_pdf_correlation(xplotting_grid.grid_values, th.error_members) + pdf_replicas = pdf.stats_class(xplotting_grid.grid_values).error_members() + corrs = _basic_obs_pdf_correlation(pdf_replicas, th.error_members) return xplotting_grid._replace(grid_values=corrs) diff --git a/validphys2/src/validphys/tests/test_closuretest.py b/validphys2/src/validphys/tests/test_closuretest.py index 181d05e550..8267424a67 100644 --- a/validphys2/src/validphys/tests/test_closuretest.py +++ b/validphys2/src/validphys/tests/test_closuretest.py @@ -20,6 +20,10 @@ def __init__(self, central_value, rawdata=None): def __len__(self,): return self.ndata + @property + def error_members(self): + return self.data[:, 1:] + N_DATA = 5 N_REPLICAS = 10 From a86fbc5075ed0f61a78c303ce2f7e64fa1902825 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 27 Jan 2022 19:24:02 +0100 Subject: [PATCH 7/8] fix vpinterface --- n3fit/src/n3fit/tests/test_vpinterface.py | 2 +- n3fit/src/n3fit/vpinterface.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/n3fit/src/n3fit/tests/test_vpinterface.py b/n3fit/src/n3fit/tests/test_vpinterface.py index 5a90d6cbf9..90e95badd4 100644 --- a/n3fit/src/n3fit/tests/test_vpinterface.py +++ b/n3fit/src/n3fit/tests/test_vpinterface.py @@ -40,7 +40,7 @@ def test_N3PDF(members, layers): xsize = np.random.randint(2, 20) xx = np.random.rand(xsize) n3pdf = generate_n3pdf(layers=layers, members=members) - assert len(n3pdf) == members + 1 + assert len(n3pdf) == members assert n3pdf.stats_class == MCStats assert n3pdf.load() is n3pdf w = n3pdf.get_nn_weights() diff --git a/n3fit/src/n3fit/vpinterface.py b/n3fit/src/n3fit/vpinterface.py index 28af0a0469..abda0aaf4a 100644 --- a/n3fit/src/n3fit/vpinterface.py +++ b/n3fit/src/n3fit/vpinterface.py @@ -74,7 +74,7 @@ def __init__(self, pdf_models, fit_basis=None, name="n3fit"): # The number of members will be minimum 2, needed for legacy compatibility # Note that the member 0 does not exist as an actual model as it corresponds # to the average of all others - self._info = {"ErrorType": "replicas", "NumMembers": len(self._models) + 1} + self._info = {"ErrorType": "replicas", "NumMembers": len(self._models)} @property def stats_class(self): From 95c8f929494c816d12c9ffedafa5b7dc9c941968 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Fri, 28 Jan 2022 12:11:44 +0100 Subject: [PATCH 8/8] miscelaneous fixes --- validphys2/src/validphys/correlations.py | 2 +- validphys2/src/validphys/deltachi2.py | 4 +- validphys2/src/validphys/eff_exponents.py | 8 ++-- validphys2/src/validphys/lhapdfset.py | 18 ++++++--- validphys2/src/validphys/mc2hessian.py | 5 +-- validphys2/src/validphys/pdfgrids.py | 46 ++++++++++++++++++----- validphys2/src/validphys/pdfplots.py | 3 +- validphys2/src/validphys/results.py | 4 +- validphys2/src/validphys/sumrules.py | 2 +- 9 files changed, 62 insertions(+), 30 deletions(-) diff --git a/validphys2/src/validphys/correlations.py b/validphys2/src/validphys/correlations.py index 998b46066d..50b21cf568 100644 --- a/validphys2/src/validphys/correlations.py +++ b/validphys2/src/validphys/correlations.py @@ -67,7 +67,7 @@ def obs_pdf_correlations(pdf, results, xplotting_grid): _ , th = results pdf_replicas = pdf.stats_class(xplotting_grid.grid_values).error_members() corrs = _basic_obs_pdf_correlation(pdf_replicas, th.error_members) - return xplotting_grid._replace(grid_values=corrs) + return xplotting_grid.copy_grid(grid_values=corrs) corrpair_results = collect('results', ['corrpair']) diff --git a/validphys2/src/validphys/deltachi2.py b/validphys2/src/validphys/deltachi2.py index 65f0a9f8a8..503a514df1 100644 --- a/validphys2/src/validphys/deltachi2.py +++ b/validphys2/src/validphys/deltachi2.py @@ -152,8 +152,8 @@ def pos_neg_xplotting_grids(delta_chi2_hessian, xplotting_grid): pos_grid = xplotting_grid.grid_values[pos_mask] neg_grid = xplotting_grid.grid_values[neg_mask] - pos_xplotting_grid = xplotting_grid._replace(grid_values=pos_grid) - neg_xplotting_grid = xplotting_grid._replace(grid_values=neg_grid) + pos_xplotting_grid = xplotting_grid.copy_grid(grid_values=pos_grid) + neg_xplotting_grid = xplotting_grid.copy_grid(grid_values=neg_grid) return [xplotting_grid, pos_xplotting_grid, neg_xplotting_grid] diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py index 32bd57abfb..fcb40aa466 100644 --- a/validphys2/src/validphys/eff_exponents.py +++ b/validphys2/src/validphys/eff_exponents.py @@ -73,7 +73,7 @@ def alpha_eff(pdf: PDF, *, warnings.simplefilter('ignore', RuntimeWarning) alphaGrid_values = -np.log(abs(pdfGrid_values/xGrid))/np.log(xGrid) alphaGrid_values[alphaGrid_values == - np.inf] = np.nan # when PDF_i =0 - alphaGrid = pdfGrid._replace(grid_values=alphaGrid_values) + alphaGrid = pdfGrid.copy_grid(grid_values=alphaGrid_values) return alphaGrid @check_positive('Q') @@ -117,7 +117,7 @@ def beta_eff(pdf, *, warnings.simplefilter('ignore', RuntimeWarning) betaGrid_values = np.log(abs(pdfGrid_values/xGrid))/np.log(1-xGrid) betaGrid_values[betaGrid_values == -np.inf] = np.nan # when PDF_i =0 - betaGrid = pdfGrid._replace(grid_values=betaGrid_values) + betaGrid = pdfGrid.copy_grid(grid_values=betaGrid_values) return betaGrid # .grid_values @@ -348,8 +348,8 @@ def next_effective_exponents_table( eff_exp_data = [] - alphastats = pdf.stats_class(alpha_effs.grid_values) - betastats = pdf.stats_class(beta_effs.grid_values) + alphastats = alpha_effs.stats_gv + betastats = beta_effs.stats_gv with warnings.catch_warnings(): warnings.simplefilter('ignore', RuntimeWarning) diff --git a/validphys2/src/validphys/lhapdfset.py b/validphys2/src/validphys/lhapdfset.py index e45ca56acb..ec9706f618 100644 --- a/validphys2/src/validphys/lhapdfset.py +++ b/validphys2/src/validphys/lhapdfset.py @@ -83,11 +83,19 @@ def central_member(self): """Returns a reference to member 0 of the PDF list""" return self._lhapdf_set[0] - def xfxQ(self, x, q, member_idx, flavour): - """Return the PDF value for one single point for one single member""" - member_pdf = self.members[member_idx] - res = member_pdf.xfxQ(x, q) - return res[flavour] + def xfxQ(self, x, Q, n, fl): + """Return the PDF value for one single point for one single member + Parameters + --------- + x: float + Q: float + n: int + fl: str + """ + member_pdf = self.members[n] + res = member_pdf.xfxQ(x, Q) + # Use `.get` so that non-existing flavours (top / antitop) just return 0 + return res.get(fl, 0.0) @property def flavors(self): diff --git a/validphys2/src/validphys/mc2hessian.py b/validphys2/src/validphys/mc2hessian.py index 4e045ced64..a6d667d491 100644 --- a/validphys2/src/validphys/mc2hessian.py +++ b/validphys2/src/validphys/mc2hessian.py @@ -108,9 +108,8 @@ def _create_mc2hessian(pdf, Q, xgrid, Neig, output_path, name=None): def _get_X(pdf, Q, xgrid, reshape=False): pdf_grid = xplotting_grid(pdf, Q, xgrid=xgrid) - pdf_grid_values = pdf_grid.grid_values - replicas = pdf_grid_values - mean = pdf_grid_values.mean(axis=0) + replicas = pdf_grid.stats_gv.error_members() + mean = pdf_grid.stats_gv.central_value() Xt = replicas - mean if reshape: Xt = Xt.reshape(Xt.shape[0], Xt.shape[1] * Xt.shape[2]) diff --git a/validphys2/src/validphys/pdfgrids.py b/validphys2/src/validphys/pdfgrids.py index 0f9061c943..103f73516e 100644 --- a/validphys2/src/validphys/pdfgrids.py +++ b/validphys2/src/validphys/pdfgrids.py @@ -3,6 +3,7 @@ to facilitate plotting and analysis. """ from collections import namedtuple +from dataclasses import dataclass import numbers import numpy as np @@ -11,7 +12,7 @@ from reportengine import collect from reportengine.checks import make_argcheck, CheckError, check_positive, check -from validphys.core import PDF +from validphys.core import PDF, Stats from validphys.gridvalues import (evaluate_luminosity) from validphys.pdfbases import (Basis, check_basis) from validphys.checks import check_pdf_normalize_to, check_xlimits @@ -37,9 +38,30 @@ def xgrid(xmin:numbers.Real=1e-5, xmax:numbers.Real=1, return (scale, arr) - -XPlottingGrid = namedtuple('XPlottingGrid', ('Q', 'basis', 'flavours', 'xgrid', - 'grid_values', 'scale')) +@dataclass +class XPlottingGrid: + """DataClass holding the value of the PDF at the specified + values of x, Q and flavour. + It also exposes a `stats_gv` attribute with a `Stats` instance + of the raw `grid_values` object in order to compute statistical + estimators in a sensible manner. + """ + Q: float + basis: (str, Basis) + flavours: (list, tuple, type(None)) + xgrid: np.ndarray + grid_values: np.ndarray + scale: str + stats_gv: Stats + + def copy_grid(self, grid_values=None): + """Create a copy of the grid with a different set of values""" + new_values = {} + if grid_values is not None: + new_values["grid_values"] = grid_values + new_values["stats_gv"] = self.stats_gv.__class__(grid_values) + new_variables = {**self.__dict__, **new_values} + return self.__class__(**new_variables) @make_argcheck(check_basis) @@ -55,6 +77,9 @@ def xplotting_grid(pdf:PDF, Q:(float,int), xgrid=None, basis:(str, Basis)='flavo If None, the defaults for that basis will be selected. Q: The PDF scale in GeV. + + The results are computed for all relevant PDF members and wrapped in a + stats class, to compute statistics regardless of the error_type. """ #Make usable outside reportengine checked = check_basis(basis, flavours) @@ -71,10 +96,11 @@ def xplotting_grid(pdf:PDF, Q:(float,int), xgrid=None, basis:(str, Basis)='flavo raise TypeError(f"Invalid xgrid {xgrid!r}") gv = basis.grid_values(pdf, flavours, xgrid, Q) #Eliminante Q axis - #TODO: wrap this in pdf.stats_class? gv = gv.reshape(gv.shape[:-1]) + # Transitional + gv_stats = pdf.stats_class(gv) - res = XPlottingGrid(Q, basis, flavours, xgrid, gv, scale) + res = XPlottingGrid(Q, basis, flavours, xgrid, gv, scale, gv_stats) return res xplotting_grids = collect(xplotting_grid, ('pdfs',)) @@ -242,7 +268,7 @@ def distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(None))=None for grid, pdf in zip(xplotting_grids, pdfs): if pdf == pdfs[normalize_to]: - newgrid = grid._replace(grid_values=np.zeros(shape=(grid.grid_values.shape[1], grid.grid_values.shape[2]))) + newgrid = grid.copy_grid(grid_values=np.zeros(shape=(grid.grid_values.shape[1], grid.grid_values.shape[2]))) newgrids.append(newgrid) continue @@ -253,7 +279,7 @@ def distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(None))=None # the distance definition distance = np.sqrt((cv1-cv2)**2/(sg1**2/N1+sg2**2/N2)) - newgrid = grid._replace(grid_values=distance) + newgrid = grid.copy_grid(grid_values=distance) newgrids.append(newgrid) return newgrids @@ -281,7 +307,7 @@ def variance_distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(No for grid, pdf in zip(xplotting_grids, pdfs): if pdf == pdfs[normalize_to]: - newgrid = grid._replace(grid_values=np.zeros(shape=(grid.grid_values.shape[1], grid.grid_values.shape[2]))) + newgrid = grid.copy_grid(grid_values=np.zeros(shape=(grid.grid_values.shape[1], grid.grid_values.shape[2]))) newgrids.append(newgrid) continue @@ -293,7 +319,7 @@ def variance_distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(No # the distance definition variance_distance = np.sqrt((sg1**2-sg2**2)**2/(s1+s2)) - newgrid = grid._replace(grid_values=variance_distance) + newgrid = grid.copy_grid(grid_values=variance_distance) newgrids.append(newgrid) return newgrids diff --git a/validphys2/src/validphys/pdfplots.py b/validphys2/src/validphys/pdfplots.py index 74397e2b83..ce926d748f 100644 --- a/validphys2/src/validphys/pdfplots.py +++ b/validphys2/src/validphys/pdfplots.py @@ -78,8 +78,7 @@ def fp_error(tp, flag): for grid in self._xplotting_grids: newvalues = grid.grid_values/normvals #newgrid is like the old grid but with updated values - newgrid = type(grid)(**{**grid._asdict(), - 'grid_values':newvalues}) + newgrid = grid.copy_grid(grid_values=newvalues) newgrids.append(newgrid) return newgrids diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 0b609273c5..e079d2f984 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -167,7 +167,7 @@ def from_convolution(cls, pdf, dataset): class PositivityResult(StatsResult): @classmethod def from_convolution(cls, pdf, posset): - loaded_pdf = pdf.load() + loaded_pdf = pdf.legacy_load() loaded_pos = posset.load() data = loaded_pos.GetPredictions(loaded_pdf) stats = pdf.stats_class(data.T) @@ -714,7 +714,7 @@ def closure_shifts(experiments_index, fit, use_cuts, experiments): def positivity_predictions_data_result(pdf, posdataset): - """Return an object containing the values of the positivuty observable.""" + """Return an object containing the values of the positivity observable.""" return PositivityResult.from_convolution(pdf, posdataset) diff --git a/validphys2/src/validphys/sumrules.py b/validphys2/src/validphys/sumrules.py index c0d8c3da93..702ef350fd 100644 --- a/validphys2/src/validphys/sumrules.py +++ b/validphys2/src/validphys/sumrules.py @@ -125,7 +125,7 @@ def f(x, lpdf, irep, Q): def _sum_rules(rules_dict, lpdf, Q): """Compute a SumRulesGrid from the loaded PDF, at Q""" - nmembers = lpdf.n_members() + nmembers = lpdf.n_members #TODO: Write this in something fast #If nothing else, at least allocate and store the result contiguously res = np.zeros((len(rules_dict), nmembers))