-
Notifications
You must be signed in to change notification settings - Fork 14
[Speculative] Modifications used for reweighting #120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
36cf2b8
0e891d0
bf087a0
4d68741
fd393b8
0ad5fd9
51eb137
65e5401
7e58eb5
1a3c2c5
e5b03c9
2662a27
4fa5732
2bcd6d3
35fe396
c69fd87
241956a
c483bad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,18 +5,91 @@ | |
| // Authors: Nathan Hartland, n.p.hartland@ed.ac.uk | ||
| // Stefano Carrazza, stefano.carrazza@mi.infn.it | ||
|
|
||
| #include "NNPDF/exceptions.h" | ||
| #include "NNPDF/chisquared.h" | ||
| #include "gsl/gsl_matrix.h" | ||
| #include "gsl/gsl_linalg.h" | ||
|
|
||
| namespace NNPDF | ||
| { | ||
| /** | ||
| * Generate covariance matrix from CommonData and a t0 vector | ||
| */ | ||
| matrix<double> ComputeCovMat(CommonData const& cd, std::vector<double> const& t0) | ||
| { | ||
| const int ndat = cd.GetNData(); | ||
| const int nsys = cd.GetNSys(); | ||
|
|
||
| template<class T> | ||
| void ComputeChi2(const T* set, int const& nMem, real *const& theory, real *chi2) | ||
| if (t0.size() != ndat) | ||
| throw LengthError("ComputeCovMat","invalid number of points in t0 vector!"); | ||
|
|
||
| auto CovMat = NNPDF::matrix<double>(ndat, ndat); | ||
| for (int i = 0; i < ndat; i++) | ||
| { | ||
| for (int j = 0; j < ndat; j++) | ||
| { | ||
| double sig = 0.0; | ||
| double signor = 0.0; | ||
|
|
||
| if (i == j) | ||
| sig += pow(cd.GetStat(i),2); // stat error | ||
|
|
||
| for (int l = 0; l < nsys; l++) | ||
| { | ||
| sysError const& isys = cd.GetSys(i,l); | ||
| sysError const& jsys = cd.GetSys(j,l); | ||
| if (isys.name != jsys.name) | ||
| throw RuntimeException("ComputeCovMat", "Inconsistent naming of systematics"); | ||
| if (isys.name == "SKIP") | ||
| continue; | ||
| const bool is_correlated = ( isys.name != "UNCORR" && isys.name !="THEORYUNCORR"); | ||
| if (i == j || is_correlated) | ||
| switch (isys.type) | ||
| { | ||
| case ADD: sig += isys.add *jsys.add; break; | ||
| case MULT: signor += isys.mult*jsys.mult; break; | ||
| case UNSET: throw RuntimeException("ComputeCovMat", "UNSET systype encountered"); | ||
| } | ||
| } | ||
|
|
||
| CovMat(i, j) = sig + signor*t0[i]*t0[j]*1e-4; | ||
| } | ||
| } | ||
| return CovMat; | ||
| } | ||
|
|
||
| matrix<double> ComputeSqrtMat(matrix<double> const& inmatrix) | ||
| { | ||
| matrix<double> const& L = set->GetSqrtCov(); | ||
| const double* data = set->GetData(); | ||
| const int nDat = set->GetNData(); | ||
| const size_t n = inmatrix.size(0); | ||
| if (n <= 0) | ||
| throw LengthError("CholeskyDecomposition","attempting a decomposition of an empty matrix!"); | ||
|
|
||
| gsl_matrix_const_view inmatrix_view = gsl_matrix_const_view_array(inmatrix.data(), n, n); | ||
| const gsl_matrix *inmatrix_gsl = &(inmatrix_view.matrix); | ||
|
|
||
| matrix<double> sqrtmat(n,n); | ||
| gsl_matrix_view sqrtmat_view = gsl_matrix_view_array(sqrtmat.data(), n, n); | ||
| gsl_matrix *sqrtmat_gsl = &(sqrtmat_view.matrix); | ||
|
|
||
| // Copy and decompose inmatrix | ||
| const int copy = gsl_matrix_memcpy (sqrtmat_gsl, inmatrix_gsl); | ||
| if (copy != 0 ) throw RuntimeException("CholeskyDecomposition", "Error encountered in gsl matrix copy"); | ||
| const int decomp = gsl_linalg_cholesky_decomp(sqrtmat_gsl); | ||
| if (decomp != 0 ) throw RuntimeException("CholeskyDecomposition", "Error encountered in gsl decomposition"); | ||
|
|
||
| // Zero upper-diagonal part of matrix left by gsl (probably unneccesary) | ||
| for (int i = 0; i < n; i++) | ||
| for (int j = 0; j > i; j++) | ||
| sqrtmat(i, j) = 0; | ||
|
|
||
| return sqrtmat; | ||
| } | ||
|
|
||
| // TODO to sort this out, need to make data and theory vectors | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm actually starting to think that vectors everywhere is not such a good idea (though we could have it as a higher level interface), especially in view of wanting to use something like this:
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know what that is, nor why we would want to use it. What would you use other than vectors? Stick to plain old pointers?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think both validphys and nnfit would benefit a lot from sharing memory between processes (like fktables). I think fktables are big enough that you will not see the performance difference in masking the train/valid split as opposed to actually slicing the tables. For vp the cost of initializing the same pdfs and fktables in several processes often offset the advantages of the parallel mode, and so it would be good if these things were loaded in shared memory once. That thing seems like a convenient way to do just that, but then you must control the allocator, which is a pain to do with the std containers. |
||
| void ComputeChi2_basic(int const nDat, int const nMem, | ||
| const double* data, matrix<double> const& L, | ||
| real *const& theory, real *chi2) | ||
| { | ||
| // Forward solve Lx = diffs | ||
| double x[nDat]; | ||
| for (int n = 0; n < nMem; n++) | ||
|
|
@@ -31,6 +104,17 @@ namespace NNPDF | |
| return; | ||
| } | ||
|
|
||
| template<class T> | ||
| void ComputeChi2(const T* set, int const& nMem, real *const& theory, real *chi2) | ||
| { | ||
| matrix<double> const& L = set->GetSqrtCov(); | ||
| const double* data = set->GetData(); | ||
| const int nDat = set->GetNData(); | ||
|
|
||
| ComputeChi2_basic(nDat, nMem, data, L, theory, chi2); | ||
| return; | ||
| } | ||
|
|
||
| template void ComputeChi2<Experiment>(const Experiment* set, int const& nMem, real *const& theory, real *chi2); | ||
| template void ComputeChi2<DataSet>(const DataSet* set, int const& nMem, real *const& theory, real *chi2); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we somehow avoid these checks being run ndata**2*nsys times? We have plots showing that it is a huge bottleneck for anything relying on these computations. Also it really looks that we can only look at the systematics of the first point, and do away with the
i==jcheck?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, that feels like a different battle to me, the battle for #25