Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
36cf2b8
Separated dataset covariance matrix generation, new function for gene…
nhartland Feb 14, 2018
0e891d0
Add a basic form of the chi2 calculation without using any special types
nhartland Feb 14, 2018
bf087a0
Better handling of very small weights in N_eff:=
nhartland Feb 15, 2018
4d68741
Empty ThPredictions constructor
nhartland Feb 15, 2018
fd393b8
Separated dataset covariance matrix generation, new function for gene…
nhartland Feb 14, 2018
0ad5fd9
Add a basic form of the chi2 calculation without using any special types
nhartland Feb 14, 2018
51eb137
Better handling of very small weights in N_eff:=
nhartland Feb 15, 2018
65e5401
Empty ThPredictions constructor
nhartland Feb 15, 2018
7e58eb5
Merge branch 'covmat_mods' of github.com:NNPDF/nnpdf into covmat_mods
nhartland Mar 6, 2018
1a3c2c5
Separated dataset covariance matrix generation, new function for gene…
nhartland Feb 14, 2018
e5b03c9
Add a basic form of the chi2 calculation without using any special types
nhartland Feb 14, 2018
2662a27
Better handling of very small weights in N_eff:=
nhartland Feb 15, 2018
4fa5732
Empty ThPredictions constructor
nhartland Feb 15, 2018
2bcd6d3
Separated dataset covariance matrix generation, new function for gene…
nhartland Feb 14, 2018
35fe396
Add a basic form of the chi2 calculation without using any special types
nhartland Feb 14, 2018
c69fd87
Merge branch 'covmat_mods' of github.com:NNPDF/nnpdf into covmat_mods
nhartland Mar 6, 2018
241956a
Cosmetics
nhartland Mar 7, 2018
c483bad
gsl matrix view version of SqrtCovMat
nhartland Mar 7, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions libnnpdf/src/NNPDF/chisquared.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,10 @@
#include "dataset.h"

namespace NNPDF{
matrix<double> ComputeCovMat(CommonData const& cd, std::vector<double> const& t0);
matrix<double> ComputeSqrtMat(matrix<double> const& inmatrix);
void ComputeChi2_basic(int const nDat, int const nMem,
const double* data, matrix<double> const& L,
real *const& theory, real *chi2);
template<class T> void ComputeChi2(const T*, int const&, real *const&, real *);
}
7 changes: 7 additions & 0 deletions libnnpdf/src/NNPDF/thpredictions.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ namespace NNPDF

ThPredictions(const PDFSet*, const PDFSet*, const FKTable*); //!< Different-beam constructor

// Empty constructor
ThPredictions(std::string pdfname,
std::string setname,
int nPDF,
int nDat,
PDFSet::erType);

ThPredictions(const ThPredictions&); //!< Copy-constructor
friend void swap(ThPredictions&, ThPredictions&);
ThPredictions& operator=(ThPredictions); //!< Copy-assignment
Expand Down
10 changes: 4 additions & 6 deletions libnnpdf/src/NNPDF/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,10 @@ std::string joinpath(const std::initializer_list<std::string> &list);
size_t const& size(size_t dim) const { return _size[dim]; } //!< Returns the (row,col) size pair.
T& operator()(size_t i, size_t j) { return _data[i*_size[1]+j]; }
T const& operator()(size_t i, size_t j) const { return _data[i*_size[1]+j]; }
//TODO: Does this have to be const? In any case there
//should be a const version.
T const * data () const {return _data.data();} //!< Return the underlying buffer.

// Data access
T * data () {return _data.data();} //!< Return the underlying buffer.
T const * data () const {return _data.data();} //!< Return the underlying buffer (const version).

private:
std::array<size_t, 2> _size; //!< the dimension pair
Expand Down Expand Up @@ -181,9 +182,6 @@ std::string joinpath(const std::initializer_list<std::string> &list);
real ComputeMom(int const& n, const real *x, int const& m);//!< Compute mth moment of distribution
void Compute68cl(std::vector<real> const& x, real &up, real &dn);//!< Compute the 68% c.l.
void Compute95cl(std::vector<real> const& x, real &up, real &dn);//!< Compute the 95% c.l.

void CholeskyDecomposition(matrix<double> const& inmatrix, matrix<double> & sqrtmat);

}

/*! @} */
94 changes: 89 additions & 5 deletions libnnpdf/src/chisquared.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,91 @@
// Authors: Nathan Hartland, n.p.hartland@ed.ac.uk
// Stefano Carrazza, stefano.carrazza@mi.infn.it

#include "NNPDF/exceptions.h"
#include "NNPDF/chisquared.h"
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_linalg.h"

namespace NNPDF
{
/**
* Generate covariance matrix from CommonData and a t0 vector
*/
matrix<double> ComputeCovMat(CommonData const& cd, std::vector<double> const& t0)
{
const int ndat = cd.GetNData();
const int nsys = cd.GetNSys();

template<class T>
void ComputeChi2(const T* set, int const& nMem, real *const& theory, real *chi2)
if (t0.size() != ndat)
throw LengthError("ComputeCovMat","invalid number of points in t0 vector!");

auto CovMat = NNPDF::matrix<double>(ndat, ndat);
for (int i = 0; i < ndat; i++)
{
for (int j = 0; j < ndat; j++)
{
double sig = 0.0;
double signor = 0.0;

if (i == j)
sig += pow(cd.GetStat(i),2); // stat error

for (int l = 0; l < nsys; l++)
{
sysError const& isys = cd.GetSys(i,l);
sysError const& jsys = cd.GetSys(j,l);
if (isys.name != jsys.name)
throw RuntimeException("ComputeCovMat", "Inconsistent naming of systematics");
if (isys.name == "SKIP")
continue;
const bool is_correlated = ( isys.name != "UNCORR" && isys.name !="THEORYUNCORR");
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we somehow avoid these checks being run ndata**2*nsys times? We have plots showing that it is a huge bottleneck for anything relying on these computations. Also it really looks that we can only look at the systematics of the first point, and do away with the i==j check?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that feels like a different battle to me, the battle for #25

if (i == j || is_correlated)
switch (isys.type)
{
case ADD: sig += isys.add *jsys.add; break;
case MULT: signor += isys.mult*jsys.mult; break;
case UNSET: throw RuntimeException("ComputeCovMat", "UNSET systype encountered");
}
}

CovMat(i, j) = sig + signor*t0[i]*t0[j]*1e-4;
}
}
return CovMat;
}

matrix<double> ComputeSqrtMat(matrix<double> const& inmatrix)
{
matrix<double> const& L = set->GetSqrtCov();
const double* data = set->GetData();
const int nDat = set->GetNData();
const size_t n = inmatrix.size(0);
if (n <= 0)
throw LengthError("CholeskyDecomposition","attempting a decomposition of an empty matrix!");

gsl_matrix_const_view inmatrix_view = gsl_matrix_const_view_array(inmatrix.data(), n, n);
const gsl_matrix *inmatrix_gsl = &(inmatrix_view.matrix);

matrix<double> sqrtmat(n,n);
gsl_matrix_view sqrtmat_view = gsl_matrix_view_array(sqrtmat.data(), n, n);
gsl_matrix *sqrtmat_gsl = &(sqrtmat_view.matrix);

// Copy and decompose inmatrix
const int copy = gsl_matrix_memcpy (sqrtmat_gsl, inmatrix_gsl);
if (copy != 0 ) throw RuntimeException("CholeskyDecomposition", "Error encountered in gsl matrix copy");
const int decomp = gsl_linalg_cholesky_decomp(sqrtmat_gsl);
if (decomp != 0 ) throw RuntimeException("CholeskyDecomposition", "Error encountered in gsl decomposition");

// Zero upper-diagonal part of matrix left by gsl (probably unneccesary)
for (int i = 0; i < n; i++)
for (int j = 0; j > i; j++)
sqrtmat(i, j) = 0;

return sqrtmat;
}

// TODO to sort this out, need to make data and theory vectors
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm actually starting to think that vectors everywhere is not such a good idea (though we could have it as a higher level interface), especially in view of wanting to use something like this:

https://arrow.apache.org/docs/python/plasma.html

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what that is, nor why we would want to use it.

What would you use other than vectors? Stick to plain old pointers?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think both validphys and nnfit would benefit a lot from sharing memory between processes (like fktables). I think fktables are big enough that you will not see the performance difference in masking the train/valid split as opposed to actually slicing the tables. For vp the cost of initializing the same pdfs and fktables in several processes often offset the advantages of the parallel mode, and so it would be good if these things were loaded in shared memory once. That thing seems like a convenient way to do just that, but then you must control the allocator, which is a pain to do with the std containers.

void ComputeChi2_basic(int const nDat, int const nMem,
const double* data, matrix<double> const& L,
real *const& theory, real *chi2)
{
// Forward solve Lx = diffs
double x[nDat];
for (int n = 0; n < nMem; n++)
Expand All @@ -31,6 +104,17 @@ namespace NNPDF
return;
}

template<class T>
void ComputeChi2(const T* set, int const& nMem, real *const& theory, real *chi2)
{
matrix<double> const& L = set->GetSqrtCov();
const double* data = set->GetData();
const int nDat = set->GetNData();

ComputeChi2_basic(nDat, nMem, data, L, theory, chi2);
return;
}

template void ComputeChi2<Experiment>(const Experiment* set, int const& nMem, real *const& theory, real *chi2);
template void ComputeChi2<DataSet>(const DataSet* set, int const& nMem, real *const& theory, real *chi2);
}
35 changes: 3 additions & 32 deletions libnnpdf/src/dataset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <memory>

#include "NNPDF/dataset.h"
#include "NNPDF/chisquared.h"
#include "NNPDF/fastkernel.h"
#include "NNPDF/thpredictions.h"
#include "NNPDF/randomgenerator.h"
Expand Down Expand Up @@ -70,38 +71,8 @@ DataSet::~DataSet()
*/
void DataSet::GenCovMat() const
{
fCovMat.clear();
fSqrtCov.clear();
fCovMat.resize(fNData, fNData, 0);
fSqrtCov.resize(fNData, fNData, 0);

if (fNData <= 0)
throw LengthError("DataSet::GenCovMat","invalid number of datapoints!");

for (int i = 0; i < fNData; i++)
for (int j = 0; j < fNData; j++)
{
double sig = 0.0;
double signor = 0.0;

if (i == j)
sig += fStat[i]*fStat[i]; // stat error

for (int l = 0; l < fNSys; l++)
if (fSys[i][l].name.compare("SKIP")!=0)
if (i == j || ( fSys[i][l].name.compare("UNCORR")!=0 && fSys[i][l].name.compare("THEORYUNCORR")!=0))
switch (fSys[i][l].type)
{
case ADD: sig += fSys[i][l].add*fSys[j][l].add; break; // additive systematics
case MULT: signor += fSys[i][l].mult*fSys[j][l].mult; break; // multiplicative systematics
case UNSET: throw RuntimeException("DataSet::GenCovMat", "UNSET systype encountered");
}

fCovMat(i, j) = sig + signor*fT0Pred[i]*fT0Pred[j]*1e-4;
}

// Compute sqrt of covmat
CholeskyDecomposition(fCovMat, fSqrtCov);
fCovMat = ComputeCovMat(*this, fT0Pred);
fSqrtCov = ComputeSqrtMat(fCovMat);
}

void DataSet::RescaleErrors(const double mult)
Expand Down
5 changes: 2 additions & 3 deletions libnnpdf/src/experiments.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <numeric>

#include "NNPDF/experiments.h"
#include "NNPDF/chisquared.h"
#include "NNPDF/pdfset.h"
#include "NNPDF/dataset.h"
#include "NNPDF/thpredictions.h"
Expand Down Expand Up @@ -427,9 +428,7 @@ void Experiment::PullData()
void Experiment::GenCovMat()
{
fCovMat.clear();
fSqrtCov.clear();
fCovMat.resize(fNData, fNData, 0);
fSqrtCov.resize(fNData, fNData, 0);

for (int i = 0; i < fNData; i++) {
// Diagonal case
Expand Down Expand Up @@ -484,7 +483,7 @@ void Experiment::GenCovMat()
}
}

CholeskyDecomposition(fCovMat, fSqrtCov);
fSqrtCov = ComputeSqrtMat(fCovMat);
}

void Experiment::ExportCovMat(string filename)
Expand Down
14 changes: 14 additions & 0 deletions libnnpdf/src/thpredictions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,20 @@ fEtype(o.fEtype)
fObs[i] = o.fObs[i];
}

/**
* Empty Constructor
*/
ThPredictions::ThPredictions(std::string pdfname, std::string setname, int nPDF, int nDat, PDFSet::erType erty):
fObs(new real[nPDF*nDat]()),
fTconv(0),
fNpdf(nPDF),
fNData(nDat),
fPDFName(pdfname),
fSetName(setname),
fEtype(erty)
{
}


void NNPDF::swap(ThPredictions& lhs, ThPredictions& rhs)
{
Expand Down
19 changes: 0 additions & 19 deletions libnnpdf/src/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -433,23 +433,4 @@ std::string joinpath(const std::initializer_list<std::string> &list)
return sum;
}

// *********** Cholesky decomposition of a matrix ***************

void CholeskyDecomposition(const matrix<double> &inmatrix, matrix<double> &sqrtmat)
{
const size_t n = inmatrix.size(0);
if (n <= 0)
throw LengthError("CholeskyDecomposition","attempting a decomposition of an empty matrix!");
gsl_matrix* mat = gsl_matrix_calloc(n, n);
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
gsl_matrix_set(mat, i, j, inmatrix(i, j));

const int decomp = gsl_linalg_cholesky_decomp(mat);
for (int i = 0; i < n; i++)
for (int j = 0; j <= i; j++)
sqrtmat(i, j) = gsl_matrix_get(mat, i, j);
gsl_matrix_free (mat);
}

}
2 changes: 1 addition & 1 deletion validphys2/src/validphys/reweighting.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def nnpdf_weights(chi2_data_for_reweighting_experiments):
def effective_number_of_replicas(w):
N = len(w)
w = w*N/np.sum(w)
return np.exp(np.nansum(w*np.log(N/w))/N)
return np.exp(np.nansum(w*np.log(N)-w*np.log(w))/N)

@table
def reweighting_stats(pdf, nnpdf_weights, p_alpha_study):
Expand Down