From d1cb21491d997f0063c6a382aff76af4a1b7c586 Mon Sep 17 00:00:00 2001
From: wilsonm <michael.wilson@ed.ac.uk>
Date: Thu, 6 Jun 2019 13:05:28 +0100
Subject: [PATCH 1/4] changed regressions and weights tests to use API

---
 validphys2/src/validphys/tests/conftest.py    | 161 +++++++++---------
 .../src/validphys/tests/test_regressions.py   |  79 ++++-----
 .../src/validphys/tests/test_weights.py       |  10 +-
 3 files changed, 116 insertions(+), 134 deletions(-)

diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py
index a8a0f5a172..2d84f5ae44 100644
--- a/validphys2/src/validphys/tests/conftest.py
+++ b/validphys2/src/validphys/tests/conftest.py
@@ -24,90 +24,89 @@ def tmp(tmpdir):
     return pathlib.Path(tmpdir)
 
 @pytest.fixture(scope='module')
-def data():
-    l = Loader()
-    dataset_inputs = [{'name': 'NMC'},
-                      {'name':'ATLASTTBARTOT', 'cfac':['QCD']},
-                      {'name':'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sysnum':10}]
-    ds = [l.check_dataset(**x, theoryid=162, cuts=None) for x in dataset_inputs]
-    exps = [ExperimentSpec(x.name, [x]) for x in ds]
-    pdf = l.check_pdf("NNPDF31_nnlo_as_0118")
-    return pdf, exps
+def data_config():
+    experiment_list = [
+        {
+            'experiment': 'NMC',
+            'datasets': [{'dataset': 'NMC'}]},
+        {
+            'experiment': 'ATLASTTBARTOT',
+            'datasets': [{'dataset': 'ATLASTTBARTOT', 'cfac':['QCD']}]},
+        {
+            'experiment': 'CMSZDIFF12',
+            'datasets': [{'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10}]}
+        ]
+    config_dict = dict(
+        pdf="NNPDF31_nnlo_as_0118",
+        use_cuts='nocuts',
+        experiments=experiment_list,
+        theoryid=162,
+        use_t0=False,
+        use_fitthcovmat=False
+    )
+    return config_dict
 
 @pytest.fixture(scope='module')
-def exps_covariance_matrices(data):
-    """produces a list of covariance matrix outputs for each experiment"""
-    _, exps = data
-    covs = [results.experiment_covariance_matrix(exp, False, None) for exp in exps]
-    return covs
+def data_witht0_config():
+    experiment_list = [
+        {
+            'experiment': 'NMC',
+            'datasets': [{'dataset': 'NMC'}]},
+        {
+            'experiment': 'ATLASTTBARTOT',
+            'datasets': [{'dataset': 'ATLASTTBARTOT', 'cfac':['QCD']}]},
+        {
+            'experiment': 'CMSZDIFF12',
+            'datasets': [{'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10}]}
+        ]
+    config_dict = dict(
+        pdf="NNPDF31_nnlo_as_0118",
+        use_cuts='nocuts',
+        experiments=experiment_list,
+        theoryid=162,
+        use_t0=True,
+        t0pdfset="NNPDF31_nnlo_as_0118",
+        use_fitthcovmat=False
+    )
+    return config_dict
 
 @pytest.fixture(scope='module')
-def t0_exps_covariance_matrices(data):
-    """produces a list of covariance matrix outputs for each experiment"""
-    pdf, exps = data
-    covs = [results.experiment_covariance_matrix(exp, False, pdf) for exp in exps]
-    return covs
-
-def convolution_results_implement(data):
-    pdf, exps = data
-    #no theory covmat here
-    covs = [results.experiment_covariance_matrix(exp, False, pdf) for exp in exps]
-    return [results.experiment_results(exp, pdf, cov) for exp, cov in zip(exps, covs)]
-
-@pytest.fixture(scope='module')
-def convolution_results(data):
-    return convolution_results_implement(data)
-
-@pytest.fixture
-def dataset_t0_convolution_results(data):
-    pdf, exps = data
-    ds = [x.datasets[0] for x in exps]
-    covs = [results.covariance_matrix(x, False, pdf) for x in ds]
-    return [results.results(x, pdf, cov) for x, cov in zip(ds, covs)]
-
-@pytest.fixture(scope='module')
-def single_exp_data():
-    l = Loader()
-    dataset_inputs = [{'name': 'NMC'},
-                      {'name':'ATLASTTBARTOT', 'cfac':['QCD']},
-                      {'name':'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sysnum':10}]
-    ds = [l.check_dataset(**x, theoryid=162, cuts=None) for x in dataset_inputs]
-    exp = ExperimentSpec('pseudo experiment', ds)
-    pdf = l.check_pdf("NNPDF31_nnlo_as_0118")
-    return pdf, exp
-
-@pytest.fixture(scope='module')
-def dataset_convolution_results(single_exp_data):
-    pdf, exp = single_exp_data
-    covs = [results.covariance_matrix(ds, False, pdf) for ds in exp.datasets]
-    return [results.results(ds, pdf, cov) for ds, cov in zip(exp.datasets, covs)]
-
-@pytest.fixture(scope='module')
-def dataset_chi2data(dataset_convolution_results):
-    return [results.abs_chi2_data(r) for r in dataset_convolution_results]
-
-def chi2data_implement(convolution_results):
-    return [results.abs_chi2_data_experiment(r) for r in convolution_results]
-
-@pytest.fixture(scope='module')
-def chi2data(convolution_results):
-    return chi2data_implement(convolution_results)
-
-@pytest.fixture(scope='module')
-def weighted_data():
-    l = Loader()
-    ds = l.check_dataset(name='NMC', theoryid=162, cuts=None)
-    wds = l.check_dataset(name='NMC', theoryid=162, cuts=None, weight=100)
-    exp = ExperimentSpec('NMC Experiment', [ds])
-    wexp = ExperimentSpec('Weighted', [wds])
-    pdf = l.check_pdf("NNPDF31_nnlo_as_0118")
-    exps = [exp, wexp]
-    return pdf, exps
-
-@pytest.fixture(scope='module')
-def convolution_results_with_weights(weighted_data):
-    return convolution_results_implement(weighted_data)
+def data_singleexp_witht0_config():
+    experiment_list = [
+        {
+            'experiment': 'pseudo experiment',
+            'datasets': [
+                {'dataset': 'NMC'},
+                {'dataset': 'ATLASTTBARTOT', 'cfac':['QCD']},
+                {'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10}]}]
+    config_dict = dict(
+        pdf="NNPDF31_nnlo_as_0118",
+        use_cuts='nocuts',
+        experiments=experiment_list,
+        theoryid=162,
+        use_t0=True,
+        t0pdfset="NNPDF31_nnlo_as_0118",
+        use_fitthcovmat=False
+    )
+    return config_dict
 
 @pytest.fixture(scope='module')
-def weighted_chi2data(convolution_results_with_weights):
-    return chi2data_implement(convolution_results_with_weights)
+def weighted_data_witht0_config():
+    experiment_list = [
+        {
+            'experiment': 'NMC Experiment',
+            'datasets': [{'dataset': 'NMC'}]},
+        {
+            'experiment': 'Weighted',
+            'datasets': [{'dataset': 'NMC', 'weight': 100}]},
+        ]
+    config_dict = dict(
+        pdf="NNPDF31_nnlo_as_0118",
+        use_cuts='nocuts',
+        experiments=experiment_list,
+        theoryid=162,
+        use_t0=True,
+        t0pdfset="NNPDF31_nnlo_as_0118",
+        use_fitthcovmat=False
+    )
+    return config_dict
diff --git a/validphys2/src/validphys/tests/test_regressions.py b/validphys2/src/validphys/tests/test_regressions.py
index ae312b2a4f..b4dea82c2b 100644
--- a/validphys2/src/validphys/tests/test_regressions.py
+++ b/validphys2/src/validphys/tests/test_regressions.py
@@ -17,6 +17,7 @@
 
 import NNPDF
 from validphys import results
+from validphys.api import API
 from validphys.tableloader import (parse_exp_mat, load_perreplica_chi2_table,
                                    sane_load, load_fits_chi2_table)
 
@@ -51,13 +52,10 @@ def f_(*args, **kwargs):
     return decorator
 
 @make_table_comp(parse_exp_mat)
-def test_expcovmat(data, exps_covariance_matrices):
-    _, exps = data
-    eindex = results.experiments_index(exps)
-    mat = results.experiments_covmat_no_table(
-        exps, eindex, exps_covariance_matrices)
+def test_expcovmat(data_config):
+    mat = API.experiments_covmat_no_table(**data_config)
     covmats = []
-    for exp in exps:
+    for exp in API.experiments(**data_config):
         cd = exp.datasets[0].commondata.load()
         covmats.append(NNPDF.ComputeCovMat(cd, cd.get_cv()))
     othermat = la.block_diag(*covmats)
@@ -65,62 +63,45 @@ def test_expcovmat(data, exps_covariance_matrices):
     return mat
 
 @make_table_comp(parse_exp_mat)
-def test_t0covmat(data, t0_exps_covariance_matrices):
-    _, exps = data
-    eindex = results.experiments_index(exps)
-    return results.experiments_covmat_no_table(
-        exps, eindex, t0_exps_covariance_matrices)
+def test_t0covmat(data_witht0_config):
+    return API.experiments_covmat_no_table(**data_witht0_config)
 
 @make_table_comp(parse_exp_mat)
-def test_expsqrtcovmat(data, exps_covariance_matrices):
-    _, exps = data
-    eindex = results.experiments_index(exps)
-    return results.experiments_sqrtcovmat(exps, eindex, exps_covariance_matrices)
+def test_expsqrtcovmat(data_config):
+    return API.experiments_sqrtcovmat(**data_config)
 
 @make_table_comp(parse_exp_mat)
-def test_t0sqrtcovmat(data, t0_exps_covariance_matrices):
-    _, exps = data
-    eindex = results.experiments_index(exps)
-    return results.experiments_sqrtcovmat(exps, eindex, t0_exps_covariance_matrices)
+def test_t0sqrtcovmat(data_witht0_config):
+    return API.experiments_sqrtcovmat(**data_witht0_config)
 
 
 @make_table_comp(sane_load)
-def test_predictions(convolution_results):
-    ths = []
-    for convolution_result in convolution_results:
-        dt, th = convolution_result
-        ths.append(th._rawdata.astype(float))
-    th = np.concatenate(ths)
-    return pd.DataFrame(th,
-        columns=map(str,
-        range(th.shape[1])))
+def test_predictions(data_config):
+    #TODO: change the baseline to just be the `experiment_result_table`
+    exp_res_tab = API.experiment_result_table_no_table(**data_config)
+    th = exp_res_tab.iloc[:, 2:].values
+    return pd.DataFrame(th, columns=map(str, range(th.shape[1])))
 
 @make_table_comp(sane_load)
-def test_dataset_t0_predictions(dataset_t0_convolution_results):
-    ths = []
-    for convolution_result in dataset_t0_convolution_results:
-        dt, th = convolution_result
-        ths.append(th._rawdata.astype(float))
-    th = np.concatenate(ths)
-    return pd.DataFrame(th,
-        columns=map(str,
-        range(th.shape[1])))
+def test_dataset_t0_predictions(data_witht0_config):
+    #TODO: same as above
+    exp_res_tab = API.experiment_result_table_no_table(**data_witht0_config)
+    th = exp_res_tab.iloc[:, 2:].values
+    return pd.DataFrame(th, columns=map(str, range(th.shape[1])))
 
 @make_table_comp(sane_load)
-def test_cv(convolution_results):
-    cvs = []
-    for convolution_result in convolution_results:
-        dt, _ = convolution_result
-        cvs.append(dt.central_value)
-    data_values = np.concatenate(cvs)
+def test_cv(data_config):
+    exp_res_tab = API.experiment_result_table_no_table(**data_config)
+    data_values = exp_res_tab.iloc[:, 0].values[:, np.newaxis]
     return pd.DataFrame(data_values, columns=['CV'])
 
 @make_table_comp(load_perreplica_chi2_table)
-def test_replicachi2data(data, chi2data):
-    pdf, exps = data
-    return results.perreplica_chi2_table(exps, chi2data)
+def test_replicachi2data(data_witht0_config):
+    return API.perreplica_chi2_table(**data_witht0_config)
 
 @make_table_comp(load_fits_chi2_table)
-def test_datasetchi2(single_exp_data, dataset_chi2data):
-    _, exp = single_exp_data
-    return results.fits_datasets_chi2_table(['test'], [[exp]], dataset_chi2data)
+def test_datasetchi2(data_singleexp_witht0_config):
+    # This is a bit hacky but avoids requiring a fit
+    exps = API.experiments(**data_singleexp_witht0_config)
+    chi2s = API.each_dataset_chi2(**data_singleexp_witht0_config)
+    return results.fits_datasets_chi2_table(['test'], [exps], chi2s)
diff --git a/validphys2/src/validphys/tests/test_weights.py b/validphys2/src/validphys/tests/test_weights.py
index 4a0ef960d2..f198d1cc52 100644
--- a/validphys2/src/validphys/tests/test_weights.py
+++ b/validphys2/src/validphys/tests/test_weights.py
@@ -3,14 +3,16 @@
 """
 import numpy as np
 
-def test_weights_have_same_commondata(weighted_data):
-    _, exps = weighted_data
+from validphys.api import API
+
+def test_weights_have_same_commondata(weighted_data_witht0_config):
+    exps = API.experiments(**weighted_data_witht0_config)
     normal, weighted = exps
     normalds, weightedds = normal.datasets[0].load(), weighted.datasets[0].load()
     assert normalds.GetSys(0, 0).mult == weightedds.GetSys(0, 0).mult
     assert normalds.GetSys(0, 0).add == weightedds.GetSys(0, 0).add
 
 
-def test_chi2_arithmetic(weighted_chi2data):
-    normal, weighted = weighted_chi2data
+def test_chi2_arithmetic(weighted_data_witht0_config):
+    normal, weighted = API.experiments_chi2(**weighted_data_witht0_config)
     assert np.allclose(weighted[0].data/normal[0].data, 100)

From a48965c0704bbfee33b585957b29eb828ec06819 Mon Sep 17 00:00:00 2001
From: wilsonm <michael.wilson@ed.ac.uk>
Date: Fri, 7 Jun 2019 12:22:52 +0100
Subject: [PATCH 2/4] expanded comment on test_predictions to explain why we
 deconstruct and reconstruct a pd.DataFrame due to limitations of CSV

---
 validphys2/src/validphys/tests/test_regressions.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/validphys2/src/validphys/tests/test_regressions.py b/validphys2/src/validphys/tests/test_regressions.py
index b4dea82c2b..ba294726e9 100644
--- a/validphys2/src/validphys/tests/test_regressions.py
+++ b/validphys2/src/validphys/tests/test_regressions.py
@@ -77,20 +77,24 @@ def test_t0sqrtcovmat(data_witht0_config):
 
 @make_table_comp(sane_load)
 def test_predictions(data_config):
-    #TODO: change the baseline to just be the `experiment_result_table`
+    # TODO: ideally we would change the baseline to just be corresponding columns
+    # of `experiment_result_table`, however sane_load expects just a single level
+    # of column and index - if we use a different format like parquet this could
+    # be changed.
     exp_res_tab = API.experiment_result_table_no_table(**data_config)
     th = exp_res_tab.iloc[:, 2:].values
     return pd.DataFrame(th, columns=map(str, range(th.shape[1])))
 
 @make_table_comp(sane_load)
 def test_dataset_t0_predictions(data_witht0_config):
-    #TODO: same as above
+    # TODO: As in `test_predictions`
     exp_res_tab = API.experiment_result_table_no_table(**data_witht0_config)
     th = exp_res_tab.iloc[:, 2:].values
     return pd.DataFrame(th, columns=map(str, range(th.shape[1])))
 
 @make_table_comp(sane_load)
 def test_cv(data_config):
+    # TODO: As in `test_predictions`
     exp_res_tab = API.experiment_result_table_no_table(**data_config)
     data_values = exp_res_tab.iloc[:, 0].values[:, np.newaxis]
     return pd.DataFrame(data_values, columns=['CV'])

From fdb91355b396229d42d0d533b40050d4c76187f0 Mon Sep 17 00:00:00 2001
From: wilsonm <michael.wilson@ed.ac.uk>
Date: Fri, 7 Jun 2019 15:43:35 +0100
Subject: [PATCH 3/4] got rid of mass duplications in conftest

---
 validphys2/src/validphys/tests/conftest.py | 122 +++++++++------------
 1 file changed, 50 insertions(+), 72 deletions(-)

diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py
index 2d84f5ae44..f9a8ceef0a 100644
--- a/validphys2/src/validphys/tests/conftest.py
+++ b/validphys2/src/validphys/tests/conftest.py
@@ -23,90 +23,68 @@ def tmp(tmpdir):
     """A tempdir that is manipulated like pathlib Paths"""
     return pathlib.Path(tmpdir)
 
-@pytest.fixture(scope='module')
-def data_config():
-    experiment_list = [
-        {
-            'experiment': 'NMC',
-            'datasets': [{'dataset': 'NMC'}]},
-        {
-            'experiment': 'ATLASTTBARTOT',
-            'datasets': [{'dataset': 'ATLASTTBARTOT', 'cfac':['QCD']}]},
-        {
-            'experiment': 'CMSZDIFF12',
-            'datasets': [{'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10}]}
-        ]
-    config_dict = dict(
-        pdf="NNPDF31_nnlo_as_0118",
+# Here define the default config items like the PDF, theory and experiment specs
+
+EXPERIMENTS = [
+    {
+        'experiment': 'NMC',
+        'datasets': [{'dataset': 'NMC'}]},
+    {
+        'experiment': 'ATLASTTBARTOT',
+        'datasets': [{'dataset': 'ATLASTTBARTOT', 'cfac':['QCD']}]},
+    {
+        'experiment': 'CMSZDIFF12',
+        'datasets': [{'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10}]}
+    ]
+
+SINGLE_EXP = [
+    {
+        'experiment': 'pseudo experiment',
+        'datasets': [
+            {'dataset': 'NMC'},
+            {'dataset': 'ATLASTTBARTOT', 'cfac':['QCD']},
+            {'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10}]}]
+
+WEIGHTED_DATA = [
+    {
+        'experiment': 'NMC Experiment',
+        'datasets': [{'dataset': 'NMC'}]},
+    {
+        'experiment': 'Weighted',
+        'datasets': [{'dataset': 'NMC', 'weight': 100}]},
+    ]
+
+PDF = "NNPDF31_nnlo_as_0118"
+THEORYID = 162
+
+base_config = dict(
+        pdf=PDF,
         use_cuts='nocuts',
-        experiments=experiment_list,
-        theoryid=162,
-        use_t0=False,
+        experiments=EXPERIMENTS,
+        theoryid=THEORYID,
         use_fitthcovmat=False
     )
-    return config_dict
+
+@pytest.fixture(scope='module')
+def data_config():
+    return base_config
 
 @pytest.fixture(scope='module')
 def data_witht0_config():
-    experiment_list = [
-        {
-            'experiment': 'NMC',
-            'datasets': [{'dataset': 'NMC'}]},
-        {
-            'experiment': 'ATLASTTBARTOT',
-            'datasets': [{'dataset': 'ATLASTTBARTOT', 'cfac':['QCD']}]},
-        {
-            'experiment': 'CMSZDIFF12',
-            'datasets': [{'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10}]}
-        ]
     config_dict = dict(
-        pdf="NNPDF31_nnlo_as_0118",
-        use_cuts='nocuts',
-        experiments=experiment_list,
-        theoryid=162,
+        **base_config,
         use_t0=True,
-        t0pdfset="NNPDF31_nnlo_as_0118",
-        use_fitthcovmat=False
-    )
+        t0pdfset=PDF)
     return config_dict
 
 @pytest.fixture(scope='module')
-def data_singleexp_witht0_config():
-    experiment_list = [
-        {
-            'experiment': 'pseudo experiment',
-            'datasets': [
-                {'dataset': 'NMC'},
-                {'dataset': 'ATLASTTBARTOT', 'cfac':['QCD']},
-                {'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10}]}]
-    config_dict = dict(
-        pdf="NNPDF31_nnlo_as_0118",
-        use_cuts='nocuts',
-        experiments=experiment_list,
-        theoryid=162,
-        use_t0=True,
-        t0pdfset="NNPDF31_nnlo_as_0118",
-        use_fitthcovmat=False
-    )
+def data_singleexp_witht0_config(data_witht0_config):
+    config_dict = dict(data_witht0_config)
+    config_dict.update({'experiments': SINGLE_EXP})
     return config_dict
 
 @pytest.fixture(scope='module')
-def weighted_data_witht0_config():
-    experiment_list = [
-        {
-            'experiment': 'NMC Experiment',
-            'datasets': [{'dataset': 'NMC'}]},
-        {
-            'experiment': 'Weighted',
-            'datasets': [{'dataset': 'NMC', 'weight': 100}]},
-        ]
-    config_dict = dict(
-        pdf="NNPDF31_nnlo_as_0118",
-        use_cuts='nocuts',
-        experiments=experiment_list,
-        theoryid=162,
-        use_t0=True,
-        t0pdfset="NNPDF31_nnlo_as_0118",
-        use_fitthcovmat=False
-    )
+def weighted_data_witht0_config(data_witht0_config):
+    config_dict = dict(data_witht0_config)
+    config_dict.update({'experiments': WEIGHTED_DATA})
     return config_dict

From 059af16c26b05e7dce92ec9d894052718b762e63 Mon Sep 17 00:00:00 2001
From: wilsonm <michael.wilson@ed.ac.uk>
Date: Fri, 7 Jun 2019 15:48:10 +0100
Subject: [PATCH 4/4] removed unneccessary imports from results

---
 validphys2/src/validphys/tests/conftest.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py
index f9a8ceef0a..c188299a14 100644
--- a/validphys2/src/validphys/tests/conftest.py
+++ b/validphys2/src/validphys/tests/conftest.py
@@ -8,10 +8,6 @@
 import pytest
 from hypothesis import settings
 
-from validphys.loader import FallbackLoader as Loader
-from validphys.core import ExperimentSpec
-from validphys import results
-
 #Adding this here to change the time of deadline from default (200ms) to 1000ms
 settings.register_profile("extratime", deadline=1000)
 settings.load_profile("extratime")