diff --git a/CMakeLists.txt b/CMakeLists.txt index 686bc87128..d7d689e2b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ option(ENABLE_TESTS "Enable unit test" OFF) option(ENABLE_ASAN "Enable ASAN" OFF) option(ENABLE_DEAD_STRIP "Enable use of flag `-dead_strip-dylibs`" OFF) option(VP_DEV "validphys in developer mode" ON) +option(N3_DEV "n3fit in developer mode" ON) set(PROFILE_PREFIX "" CACHE STRING "Where you store the 'data' folder. Default empty uses CMAKE_INSTALL_PREFIX/share/NNPDF.") if (PROFILE_PREFIX) @@ -143,9 +144,19 @@ add_subdirectory(libnnpdf) # nnpdfcpp configuration add_subdirectory(nnpdfcpp) +# evolven3fit +add_subdirectory(n3fit/evolven3fit) + # install validphys2 if(VP_DEV) install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install -e ${PROJECT_SOURCE_DIR}/validphys2)") else(VP_DEV) install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install --no-deps --ignore-installed ${PROJECT_SOURCE_DIR}/validphys2)") endif(VP_DEV) + +# install n3fit +if(N3_DEV) + install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install -e ${PROJECT_SOURCE_DIR}/n3fit)") +else(N3_DEV) + install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install --no-deps --ignore-installed ${PROJECT_SOURCE_DIR}/n3fit)") +endif(N3_DEV) diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh index 3539ebbd4f..b4632560d0 100644 --- a/conda-recipe/build.sh +++ b/conda-recipe/build.sh @@ -2,6 +2,6 @@ mkdir build cd build -cmake .. -DCMAKE_INSTALL_PREFIX=${PREFIX} -DVP_DEV=OFF +cmake .. -DCMAKE_INSTALL_PREFIX=${PREFIX} -DVP_DEV=OFF -DN3_DEV=OFF make -j${CPU_COUNT} make install diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 8dea477cab..2ae3c8becc 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -22,6 +22,10 @@ requirements: - python - numpy run: + - tensorflow + - keras + - hyperopt + - seaborn - lhapdf - sqlite - gsl diff --git a/n3fit/evolven3fit/CMakeLists.txt b/n3fit/evolven3fit/CMakeLists.txt new file mode 100644 index 0000000000..316cc9a757 --- /dev/null +++ b/n3fit/evolven3fit/CMakeLists.txt @@ -0,0 +1,35 @@ +# Include files (should this information not be known at this point?) +include_directories(${PROJECT_SOURCE_DIR}/nnpdfcpp/src/common/inc) +include_directories(${PROJECT_SOURCE_DIR}/nnpdfcpp/src/nnfit/inc) +include_directories(${PROJECT_SOURCE_DIR}/n3fit/evolven3fit) +include_directories(${PROJECT_SOURCE_DIR}/libnnpdf/src/) +set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/binaries) + +configure_file( + "${PROJECT_SOURCE_DIR}/libnnpdf/src/NNPDF/common.h.in" + "${PROJECT_SOURCE_DIR}/libnnpdf/src/NNPDF/common.h" + ) + +# To whom it may concern +# I am 100% sure there is a better way of doing all of it +# but I care 0% about it atm +set(NNPDF_LDFLAGS "-L${PROJECT_BINARY_DIR}/libnnpdf/ -lnnpdf") + +# Add files to the make +add_executable(evolven3fit ${PROJECT_SOURCE_DIR}/n3fit/evolven3fit/evolven3fit.cc + ${PROJECT_SOURCE_DIR}/nnpdfcpp/src/common/src/nnpdfsettings.cc + ${PROJECT_SOURCE_DIR}/nnpdfcpp/src/common/src/md5.cc + ${PROJECT_SOURCE_DIR}/nnpdfcpp/src/common/src/exportgrid.cc + ${PROJECT_SOURCE_DIR}/nnpdfcpp/src/nnfit/src/evolgrid.cc ) + +# Set all flags +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${NNPDF_LDFLAGS} ${LHAPDF_LIBRARIES} ${GSL_LDFLAGS} ${APFEL_LIBRARIES} ${YAML_LDFLAGS}") + +# I am pretty sure this should not be a thing +string(REPLACE ";" " " CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") +target_link_libraries(evolven3fit nnpdf ${LHAPDF_LIBRARIES} ${YAML_LDFLAGS} ${APFEL_LIBRARIES} ${GSL_LDFLAGS}) + +install(TARGETS evolven3fit DESTINATION bin + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ +GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) + diff --git a/n3fit/evolven3fit/evolven3fit.cc b/n3fit/evolven3fit/evolven3fit.cc new file mode 100644 index 0000000000..3b4503022a --- /dev/null +++ b/n3fit/evolven3fit/evolven3fit.cc @@ -0,0 +1,106 @@ +// $Id$ +// +// NNPDF++ 2016 +// +// Authors: Nathan Hartland, n.p.hartland@ed.ac.uk +// Stefano Carrazza, stefano.carrazza@mi.infn.it + +#include +#include +#include "common.h" +#include "nnpdfsettings.h" +#include "exportgrid.h" +#include "evolgrid.h" +#include +using namespace NNPDF; +using std::cout; +using std::endl; +using std::cerr; +using std::string; +using std::stoi; + +// Check if folder exists +bool CheckConsistency(string const& folder, string const& exportfile) +{ + bool status1 = false, status2 = false; + struct stat s, t; + if (stat(folder.c_str(), &s) == 0) + if (s.st_mode & S_IFDIR) + status1 = true; + if (stat(exportfile.c_str(), &t) == 0) + if (t.st_mode) + status2 = true; + if (status1 == true && status2 == true) return true; + else return false; +} + +/** + * This program: + * - takes as input a fit folder and a theoryID, + * - loads a vector of ExportGrid for all replicas generated by nnfit, + * - computes the DGLAP evolution operators for the theoryID + * - applies the evolution operators to the ExportGrid objects + * - outputs the evolved PDFs in the LHAPDF format to the fit folder. + */ +int main(int argc, char **argv) +{ + // Read configuration filename from arguments + if (argc != 3) + { + cerr << Colour::FG_RED << "\nusage: evolven3fit [configuration folder] [max_replicas]\n" << Colour::FG_DEFAULT << endl; + exit(EXIT_FAILURE); + } + + const string fit_path = argv[1]; + const int maxreplica = stoi(argv[2]); + + // load settings from config folder + NNPDFSettings settings(fit_path); + const int theory_id = settings.Get("theory","theoryid").as(); + + // load theory from db + std::map theory_map; + NNPDF::IndexDB db(get_data_path() + "/theory.db", "theoryIndex"); + db.ExtractMap(theory_id, APFEL::kValues, theory_map); + + // load grids + vector initialscale_grids; + vector replicas; + for (int nrep = 1; nrep <= maxreplica; nrep++) + { + const string folder = fit_path + "/nnfit/replica_" + std::to_string(nrep); + const string path = folder + "/" + settings.GetPDFName() + ".exportgrid"; + bool status = CheckConsistency(folder, path); + if (status) + { + initialscale_grids.emplace_back(path); + replicas.push_back(nrep); + } + else + { + cout << "Skipping exportgrid (missing file): " << path << endl; + } + } + + if (initialscale_grids.size() == 0) + throw NNPDF::RuntimeException("main", "nrep = 0, check replica folder/files."); + + string infofile = fit_path + "/nnfit/" + settings.GetPDFName() + ".info"; + auto dglapg = EvolveGrid(initialscale_grids, theory_map); + dglapg.WriteInfoFile(infofile); + + const auto outstream = dglapg.WriteLHAFile(); + for (size_t i = 0; i < outstream.size(); i++) + { + stringstream replica_file; + replica_file << fit_path + << "/nnfit/replica_" + << replicas[i] + << "/" + << settings.GetPDFName() + << ".dat"; + write_to_file(replica_file.str(), outstream[i].str()); + } + + return 0; +} diff --git a/n3fit/evolven3fit/version.h b/n3fit/evolven3fit/version.h new file mode 100644 index 0000000000..f84864988a --- /dev/null +++ b/n3fit/evolven3fit/version.h @@ -0,0 +1 @@ +#define SVN_REV 3.1 diff --git a/n3fit/runcards/Basic_runcard.yml b/n3fit/runcards/Basic_runcard.yml new file mode 100644 index 0000000000..775d4d744f --- /dev/null +++ b/n3fit/runcards/Basic_runcard.yml @@ -0,0 +1,150 @@ +# +# Configuration file for NNPDF++ +# + +############################################################ +description: NNPDF3.1 NLO fitted charm global dataset + +############################################################ +# frac: training fraction +# ewk: apply ewk k-factors +# sys: systematics treatment (see systypes) +experiments: + - experiment: ALL + datasets: + - { dataset: SLACP, frac: 0.5} + - { dataset: NMCPD, frac: 0.5 } + - { dataset: CMSJETS11, frac: 0.5, sys: 10 } + +############################################################ +datacuts: + t0pdfset : NNPDF31_nlo_as_0118 # PDF set to generate t0 covmat + q2min : 3.49 # Q2 minimum + w2min : 12.5 # W2 minimum + combocuts : NNPDF31 # NNPDF3.0 final kin. cuts + jetptcut_tev : 0 # jet pt cut for tevatron + jetptcut_lhc : 0 # jet pt cut for lhc + wptcut_lhc : 30.0 # Minimum pT for W pT diff distributions + jetycut_tev : 1e30 # jet rap. cut for tevatron + jetycut_lhc : 1e30 # jet rap. cut for lhc + dymasscut_min: 0 # dy inv.mass. min cut + dymasscut_max: 1e30 # dy inv.mass. max cut + jetcfactcut : 1e30 # jet cfact. cut + +############################################################ +theory: + theoryid: 53 # database id + +########################################################### +hyperscan: + stopping: + min_epochs: 5e2 + max_epochs: 40e2 + min_patience: 0.10 + max_patience: 0.40 + positivity: + min_multiplier: 1.00000001 + max_multiplier: 1.00000002 + min_initial: + max_initial: + optimizer: + names: 'ALL' # Use all implemented optimizers + min_lr: 0.0005 + max_lr: 0.5 + architecture: + initializers: 'ALL' + max_drop: 0.15 + n_layers: [2,3,4] + min_units: 5 + max_units: 50 + activations: ['sigmoid', 'tanh'] + + +############################################################ +fitting: + genrep : True # on = generate MC replicas, False = use real data + trvlseed: 1 + nnseed: 2 + mcseed: 3 + epochs: 900 + # CHANGE THE FOLLOWING OPTIONS + save: False + savefile: 'weights.hd5' + load: False + loadfile: 'weights.hd5' + plot: False + + parameters: # This defines the parameter dictionary that is passed to the Model Trainer + nodes_per_layer: [15, 10, 8] + activation_per_layer: ['sigmoid', 'sigmoid', 'linear'] + initializer: 'glorot_normal' + learning_rate: 0.01 + optimizer: 'RMSprop' + epochs: 900 + pos_multiplier: 1.05 + pos_initial: # believe the pos_lambda below + stopping_patience: 0.30 # percentage of the number of epochs + layer_type: 'dense' + dropout: 0.0 + + # NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7) + # EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7) + # EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7) + # FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7) + fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc. + basis: + # remeber to change the name of PDF accordingly with fitbasis + # pos: True for NN squared + # mutsize: mutation size + # mutprob: mutation probability + # smallx, largex: preprocessing ranges + - { fl: sng, pos: False, mutsize: [15], mutprob: [0.05], smallx: [1.05,1.19], largex: [1.47,2.70] } + - { fl: g, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.94,1.25], largex: [0.11,5.87] } + - { fl: v, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.54,0.75], largex: [1.15,2.76] } + - { fl: v3, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.21,0.57], largex: [1.35,3.08] } + - { fl: v8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.52,0.76], largex: [0.77,3.56] } + - { fl: t3, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.37,1.52], largex: [1.74,3.39] } + - { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.56,1.29], largex: [1.45,3.03] } + - { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.12,1.19], largex: [1.83,6.70] } + +############################################################ +stopping: + stopmethod: LOOKBACK # Stopping method + lbdelta : 0 # Delta for look-back stopping + mingen : 0 # Minimum number of generations + window : 500 # Window for moving average + minchi2 : 3.5 # Minimum chi2 + minchi2exp: 6.0 # Minimum chi2 for experiments + nsmear : 200 # Smear for stopping + deltasm : 200 # Delta smear for stopping + rv : 2 # Ratio for validation stopping + rt : 0.5 # Ratio for training stopping + epsilon : 1e-6 # Gradient epsilon + +############################################################ +positivity: + posdatasets: + - { dataset: POSF2U, poslambda: 1e6 } # Positivity Lagrange Multiplier + +############################################################ +closuretest: + filterseed : 0 # Random seed to be used in filtering data partitions + fakedata : False # on = to use FAKEPDF to generate pseudo-data + fakepdf : MSTW2008nlo68cl # Theory input for pseudo-data + errorsize : 1.0 # uncertainties rescaling + fakenoise : False # on = to add random fluctuations to pseudo-data + rancutprob : 1.0 # Fraction of data to be included in the fit + rancutmethod: 0 # Method to select rancutprob data fraction + rancuttrnval: False # 0(1) to output training(valiation) chi2 in report + printpdf4gen: False # To print info on PDFs during minimization + +############################################################ +lhagrid: + nx : 150 + xmin: 1e-9 + xmed: 0.1 + xmax: 1.0 + nq : 50 + qmax: 1e5 + +############################################################ diff --git a/n3fit/runcards/PN3_DIS_example.yml b/n3fit/runcards/PN3_DIS_example.yml new file mode 100644 index 0000000000..19efd8f33e --- /dev/null +++ b/n3fit/runcards/PN3_DIS_example.yml @@ -0,0 +1,179 @@ +# +# Configuration file for NNPDF++ +# + +############################################################ +description: NNPDF3.1 NNLO fitted charm global dataset + +############################################################ +# frac: training fraction +# ewk: apply ewk k-factors +# sys: systematics treatment (see systypes) +experiments: +# Fixed target DIS + - experiment: NMC + datasets: + - { dataset: NMCPD, frac: 0.5 } + - { dataset: NMC, frac: 0.5 } + - experiment: SLAC + datasets: + - { dataset: SLACP, frac: 0.5} + - { dataset: SLACD, frac: 0.5} + - experiment: BCDMS + datasets: + - { dataset: BCDMSP, frac: 0.5} + - { dataset: BCDMSD, frac: 0.5} + - experiment: CHORUS + datasets: + - { dataset: CHORUSNU, frac: 0.5} + - { dataset: CHORUSNB, frac: 0.5} + - experiment: NTVDMN + datasets: + - { dataset: NTVNUDMN, frac: 0.5} + - { dataset: NTVNBDMN, frac: 0.5} +# EMC F2C data +# - experiment: EMCF2C +# datasets: +# - { dataset: EMCF2C, frac: 1.0} +# HERA data + - experiment: HERACOMB + datasets: + - { dataset: HERACOMBNCEM , frac: 0.5} + - { dataset: HERACOMBNCEP460, frac: 0.5} + - { dataset: HERACOMBNCEP575, frac: 0.5} + - { dataset: HERACOMBNCEP820, frac: 0.5} + - { dataset: HERACOMBNCEP920, frac: 0.5} + - { dataset: HERACOMBCCEM , frac: 0.5} + - { dataset: HERACOMBCCEP , frac: 0.5} +# Combined HERA charm production cross-sections + - experiment: HERAF2CHARM + datasets: + - { dataset: HERAF2CHARM, frac: 0.5} +# F2bottom data + - experiment: F2BOTTOM + datasets: + - { dataset: H1HERAF2B, frac: 1.0} + - { dataset: ZEUSHERAF2B, frac: 1.0} + + +############################################################ +datacuts: + t0pdfset : NNPDF31_nnlo_as_0118 # PDF set to generate t0 covmat + q2min : 3.49 # Q2 minimum + w2min : 12.5 # W2 minimum + combocuts : NNPDF31 # NNPDF3.0 final kin. cuts + jetptcut_tev : 0 # jet pt cut for tevatron + jetptcut_lhc : 0 # jet pt cut for lhc + wptcut_lhc : 30.0 # Minimum pT for W pT diff distributions + jetycut_tev : 1e30 # jet rap. cut for tevatron + jetycut_lhc : 1e30 # jet rap. cut for lhc + dymasscut_min: 0 # dy inv.mass. min cut + dymasscut_max: 1e30 # dy inv.mass. max cut + jetcfactcut : 1e30 # jet cfact. cut + +############################################################ +theory: + theoryid: 53 # database id + +############################################################ +fitting: + + trvlseed: 1 + nnseed: 2 + mcseed: 3 + epochs: 20000 + save: False + savefile: 'weights.hd5' + load: False + loadfile: 'weights.hd5' + plot: False + + seed : 9453862133528 # set the seed for the random generator + genrep : True # on = generate MC replicas, off = use real data + rngalgo : 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc + fitmethod: NGA # Minimization algorithm + nmutants : 80 # Number of mutants for replica + paramtype: NN + nnodes : [2,5,3,1] + + parameters: # This defines the parameter dictionary that is passed to the Model Trainer + nodes_per_layer: [35, 25, 8] + activation_per_layer: ['tanh', 'tanh', 'linear'] + initializer: 'glorot_normal' + learning_rate: 0.01 + optimizer: 'Adadelta' + epochs: 40000 + pos_multiplier: 1.09 + pos_initial: 10.0 # believe the pos_lambda above + stopping_patience: 0.30 # percentage of the number of epochs + layer_type: 'dense' + dropout: 0.0 + + # NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7) + # EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7) + # EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7) + # FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7) + fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc. + basis: + # remeber to change the name of PDF accordingly with fitbasis + # pos: True for NN squared + # mutsize: mutation size + # mutprob: mutation probability + # smallx, largex: preprocessing ranges + - { fl: sng, pos: False, mutsize: [15], mutprob: [0.05], smallx: [1.04,1.20], largex: [1.45,2.64] } + - { fl: g, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.82,1.31], largex: [0.20,6.17] } + - { fl: v, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.51,0.71], largex: [1.24,2.80] } + - { fl: v3, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.23,0.63], largex: [1.02,3.14] } + - { fl: v8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.53,0.75], largex: [0.70,3.31] } + - { fl: t3, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.45,1.41], largex: [1.78,3.21] } + - { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] } + - { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] } + +############################################################ +stopping: + stopmethod: LOOKBACK # Stopping method + lbdelta : 0 # Delta for look-back stopping + mingen : 0 # Minimum number of generations + window : 500 # Window for moving average + minchi2 : 3.5 # Minimum chi2 + minchi2exp: 6.0 # Minimum chi2 for experiments + nsmear : 200 # Smear for stopping + deltasm : 200 # Delta smear for stopping + rv : 2 # Ratio for validation stopping + rt : 0.5 # Ratio for training stopping + epsilon : 1e-6 # Gradient epsilon + +############################################################ +positivity: + posdatasets: + - { dataset: POSF2U, poslambda: 1e6 } # Positivity Lagrange Multiplier + - { dataset: POSF2DW, poslambda: 1e6 } + - { dataset: POSF2S, poslambda: 1e6 } + - { dataset: POSFLL, poslambda: 1e6 } + - { dataset: POSDYU, poslambda: 1e10 } + - { dataset: POSDYD, poslambda: 1e10 } + - { dataset: POSDYS, poslambda: 1e10 } + +############################################################ +closuretest: + filterseed : 0 # Random seed to be used in filtering data partitions + fakedata : False # on = to use FAKEPDF to generate pseudo-data + fakepdf : MSTW2008nlo68cl # Theory input for pseudo-data + errorsize : 1.0 # uncertainties rescaling + fakenoise : False # on = to add random fluctuations to pseudo-data + rancutprob : 1.0 # Fraction of data to be included in the fit + rancutmethod: 0 # Method to select rancutprob data fraction + rancuttrnval: False # 0(1) to output training(valiation) chi2 in report + printpdf4gen: False # To print info on PDFs during minimization + +############################################################ +lhagrid: + nx : 150 + xmin: 1e-9 + xmed: 0.1 + xmax: 1.0 + nq : 50 + qmax: 1e5 + +############################################################ +debug: False diff --git a/n3fit/runcards/PN3_Global_example.yml b/n3fit/runcards/PN3_Global_example.yml new file mode 100644 index 0000000000..5e66e1d96e --- /dev/null +++ b/n3fit/runcards/PN3_Global_example.yml @@ -0,0 +1,265 @@ +# +# Configuration file for NNPDF++ +# + +############################################################ +description: NNPDF3.1 NNLO fitted charm global dataset + +############################################################ +# frac: training fraction +# ewk: apply ewk k-factors +# sys: systematics treatment (see systypes) +experiments: +# Fixed target DIS + - experiment: NMC + datasets: + - { dataset: NMCPD, frac: 0.5 } + - { dataset: NMC, frac: 0.5 } + - experiment: SLAC + datasets: + - { dataset: SLACP, frac: 0.5} + - { dataset: SLACD, frac: 0.5} + - experiment: BCDMS + datasets: + - { dataset: BCDMSP, frac: 0.5} + - { dataset: BCDMSD, frac: 0.5} + - experiment: CHORUS + datasets: + - { dataset: CHORUSNU, frac: 0.5} + - { dataset: CHORUSNB, frac: 0.5} + - experiment: NTVDMN + datasets: + - { dataset: NTVNUDMN, frac: 0.5} + - { dataset: NTVNBDMN, frac: 0.5} +# EMC F2C data +# - experiment: EMCF2C +# datasets: +# - { dataset: EMCF2C, frac: 1.0} +# HERA data + - experiment: HERACOMB + datasets: + - { dataset: HERACOMBNCEM , frac: 0.5} + - { dataset: HERACOMBNCEP460, frac: 0.5} + - { dataset: HERACOMBNCEP575, frac: 0.5} + - { dataset: HERACOMBNCEP820, frac: 0.5} + - { dataset: HERACOMBNCEP920, frac: 0.5} + - { dataset: HERACOMBCCEM , frac: 0.5} + - { dataset: HERACOMBCCEP , frac: 0.5} +# Combined HERA charm production cross-sections + - experiment: HERAF2CHARM + datasets: + - { dataset: HERAF2CHARM, frac: 0.5} +# F2bottom data + - experiment: F2BOTTOM + datasets: + - { dataset: H1HERAF2B, frac: 1.0} + - { dataset: ZEUSHERAF2B, frac: 1.0} + # Fixed target Drell-Yan + - experiment: DYE886 + datasets: + - { dataset: DYE886R, frac: 1.0 } + - { dataset: DYE886P, frac: 0.5, cfac: [QCD] } + - experiment: DYE605 + datasets: + - { dataset: DYE605, frac: 0.5, cfac: [QCD] } +# Tevatron jets and W,Z production + - experiment: CDF + datasets: + - { dataset: CDFZRAP, frac: 1.0, cfac: [QCD] } + - { dataset: CDFR2KT, frac: 0.5, sys: 10 } + - experiment: D0 + datasets: + - { dataset: D0ZRAP, frac: 1.0, cfac: [QCD] } + - { dataset: D0WEASY, frac: 1.0, cfac: [QCD] } + - { dataset: D0WMASY, frac: 1.0, cfac: [QCD] } + # ATLAS + - experiment: ATLAS + datasets: +# ATLAS EWK + - { dataset: ATLASWZRAP36PB, frac: 1.0, cfac: [QCD] } + - { dataset: ATLASZHIGHMASS49FB, frac: 1.0, cfac: [QCD] } + - { dataset: ATLASLOMASSDY11EXT, frac: 1.0, cfac: [QCD] } + - { dataset: ATLASWZRAP11, frac: 0.5, cfac: [QCD] } +# ATLAS jets + - { dataset: ATLASR04JETS36PB, frac: 0.5, sys: 10 } + - { dataset: ATLASR04JETS2P76TEV, frac: 0.5, sys: 10 } + - { dataset: ATLAS1JET11, frac: 0.5, sys: 10 } +# ATLAS Z pt +# - { dataset: ATLASZPT7TEV, frac: 0.5, cfac: [QCD,NRM], sys: 10 } + - { dataset: ATLASZPT8TEVMDIST, frac: 0.5, cfac: [QCD], sys: 10 } + - { dataset: ATLASZPT8TEVYDIST, frac: 0.5, cfac: [QCD], sys: 10 } +# ATLAS top + - { dataset: ATLASTTBARTOT, frac: 1.0, cfac: [QCD] } + - { dataset: ATLASTOPDIFF8TEVTRAPNORM, frac: 1.0, cfac: [QCD] } +# CMS + - experiment: CMS + datasets: +# CMS EWK + - { dataset: CMSWEASY840PB, frac: 1.0, cfac: [QCD] } + - { dataset: CMSWMASY47FB, frac: 1.0, cfac: [QCD] } +# - { dataset: CMSWCHARMTOT, frac: 1.0 } +# - { dataset: CMSWCHARMRAT, frac: 1.0 } + - { dataset: CMSDY2D11, frac: 0.5, cfac: [QCD] } + - { dataset: CMSWMU8TEV, frac: 1.0, cfac: [QCD] } +# CMS jets + - { dataset: CMSJETS11, frac: 0.5, sys: 10 } + - { dataset: CMS1JET276TEV, frac: 0.5, sys: 10 } +# CMS Z pt + - { dataset: CMSZDIFF12, frac: 1.0, cfac: [QCD,NRM], sys: 10 } +# CMS ttbar + - { dataset: CMSTTBARTOT, frac: 1.0, cfac: [QCD] } + - { dataset: CMSTOPDIFF8TEVTTRAPNORM, frac: 1.0, cfac: [QCD] } + # LHCb + - experiment: LHCb + datasets: + - { dataset: LHCBZ940PB, frac: 1.0, cfac: [QCD] } + - { dataset: LHCBZEE2FB, frac: 1.0, cfac: [QCD] } + - { dataset: LHCBWZMU7TEV, frac: 1.0, cfac: [NRM,QCD] } + - { dataset: LHCBWZMU8TEV, frac: 1.0, cfac: [NRM,QCD] } + + +############################################################ +datacuts: + t0pdfset : NNPDF31_nnlo_as_0118 # PDF set to generate t0 covmat + q2min : 3.49 # Q2 minimum + w2min : 12.5 # W2 minimum + combocuts : NNPDF31 # NNPDF3.0 final kin. cuts + jetptcut_tev : 0 # jet pt cut for tevatron + jetptcut_lhc : 0 # jet pt cut for lhc + wptcut_lhc : 30.0 # Minimum pT for W pT diff distributions + jetycut_tev : 1e30 # jet rap. cut for tevatron + jetycut_lhc : 1e30 # jet rap. cut for lhc + dymasscut_min: 0 # dy inv.mass. min cut + dymasscut_max: 1e30 # dy inv.mass. max cut + jetcfactcut : 1e30 # jet cfact. cut + +############################################################ +theory: + theoryid: 53 # database id + +hyperscan: + stopping: + min_epochs: 25e3 + max_epochs: 40e3 + min_patience: 0.10 + max_patience: 0.40 + positivity: + min_multiplier: 1.04 + max_multiplier: 1.1 + min_initial: 1.0 + max_initial: 50.0 + optimizer: + names: ['RMSprop', 'Adadelta'] + min_lr: 0.0005 + max_lr: 0.13 + architecture: + initializers: ['glorot_normal', 'glorot_uniform'] + max_drop: 0.05 + n_layers: [1,2] + min_units: 5 + max_units: 50 + activations: ['sigmoid', 'tanh'] + +############################################################ +fitting: + + trvlseed: 1 + nnseed: 2 + mcseed: 3 + epochs: 50000 + save: False + savefile: 'weights.hd5' + load: False + loadfile: 'weights.hd5' + plot: False + + seed : 9453862133528 # set the seed for the random generator + genrep : True # on = generate MC replicas, off = use real data + rngalgo : 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc + fitmethod: NGA # Minimization algorithm + ngen : 30000 # Maximum number of generations + nmutants : 80 # Number of mutants for replica + paramtype: NN + nnodes : [2,5,3,1] + + parameters: # This defines the parameter dictionary that is passed to the Model Trainer + nodes_per_layer: [50, 35, 25, 8] + activation_per_layer: ['tanh', 'sigmoid', 'sigmoid', 'linear'] + initializer: 'glorot_normal' + learning_rate: 0.002 + optimizer: 'Adadelta' + epochs: 50000 + pos_multiplier: 1.09 + pos_initial: 1.1 + stopping_patience: 0.3 # percentage of the number of epochs + layer_type: 'dense' + dropout: 0.006 + + # NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7) + # EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7) + # EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7) + # FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7) + fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc. + basis: + # remeber to change the name of PDF accordingly with fitbasis + # pos: True for NN squared + # mutsize: mutation size + # mutprob: mutation probability + # smallx, largex: preprocessing ranges + - { fl: sng, pos: False, mutsize: [15], mutprob: [0.05], smallx: [1.04,1.20], largex: [1.45,2.64] } + - { fl: g, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.82,1.31], largex: [0.20,6.17] } + - { fl: v, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.51,0.71], largex: [1.24,2.80] } + - { fl: v3, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.23,0.63], largex: [1.02,3.14] } + - { fl: v8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.53,0.75], largex: [0.70,3.31] } + - { fl: t3, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.45,1.41], largex: [1.78,3.21] } + - { fl: t8, pos: False, mutsize: [15], mutprob: [0.05], smallx: [0.49,1.32], largex: [1.42,3.13] } + - { fl: cp, pos: False, mutsize: [15], mutprob: [0.05], smallx: [-0.07,1.13], largex: [1.73,7.37] } + +############################################################ +stopping: + stopmethod: LOOKBACK # Stopping method + lbdelta : 0 # Delta for look-back stopping + mingen : 0 # Minimum number of generations + window : 500 # Window for moving average + minchi2 : 3.5 # Minimum chi2 + minchi2exp: 6.0 # Minimum chi2 for experiments + nsmear : 200 # Smear for stopping + deltasm : 200 # Delta smear for stopping + rv : 2 # Ratio for validation stopping + rt : 0.5 # Ratio for training stopping + epsilon : 1e-6 # Gradient epsilon + +############################################################ +positivity: + posdatasets: + - { dataset: POSF2U, poslambda: 1e6 } # Positivity Lagrange Multiplier + - { dataset: POSF2DW, poslambda: 1e6 } + - { dataset: POSF2S, poslambda: 1e6 } + - { dataset: POSFLL, poslambda: 1e6 } + - { dataset: POSDYU, poslambda: 1e10 } + - { dataset: POSDYD, poslambda: 1e10 } + - { dataset: POSDYS, poslambda: 1e10 } + +############################################################ +closuretest: + filterseed : 0 # Random seed to be used in filtering data partitions + fakedata : False # on = to use FAKEPDF to generate pseudo-data + fakepdf : MSTW2008nlo68cl # Theory input for pseudo-data + errorsize : 1.0 # uncertainties rescaling + fakenoise : False # on = to add random fluctuations to pseudo-data + rancutprob : 1.0 # Fraction of data to be included in the fit + rancutmethod: 0 # Method to select rancutprob data fraction + rancuttrnval: False # 0(1) to output training(valiation) chi2 in report + printpdf4gen: False # To print info on PDFs during minimization + +############################################################ +lhagrid: + nx : 150 + xmin: 1e-9 + xmed: 0.1 + xmax: 1.0 + nq : 50 + qmax: 1e5 + +############################################################ +debug: False diff --git a/n3fit/setup.py b/n3fit/setup.py new file mode 100644 index 0000000000..689cfaaa1b --- /dev/null +++ b/n3fit/setup.py @@ -0,0 +1,13 @@ +from setuptools import setup, find_packages + +setup( + name="n3fit", + version="0.9", + package_dir = {'':'src'}, + packages=find_packages('src'), + + entry_points = {'console_scripts': + ['n3fit = n3fit.n3fit:main', + 'n3Hyperplot = n3fit.hyper_optimization.plotting:main'], + }, +) diff --git a/n3fit/src/n3fit/ModelTrainer.py b/n3fit/src/n3fit/ModelTrainer.py new file mode 100644 index 0000000000..e779124f09 --- /dev/null +++ b/n3fit/src/n3fit/ModelTrainer.py @@ -0,0 +1,536 @@ +""" + The ModelTrainer class is the true driver around the n3fit code + + This class is initialized with all information about the NN, inputs and outputs. + The construction of the NN and the fitting is performed at the same time when the + hyperparametizable method of the function is called. + + This allows to use hyperscanning libraries, that need to change the parameters of the network + between iterations while at the same time keeping the amount of redundant calls to a minimum +""" +import n3fit.model_gen as model_gen +import n3fit.msr as msr_constraints +import n3fit.Statistics as Statistics +from n3fit.backends import MetaModel, clear_backend_state + +import logging + +log = logging.getLogger(__name__) + +# If an experiment matches this name, it will not be included int he training +TESTNAME = "TEST" +# Weights for the hyperopt cost function +TEST_MULTIPLIER = 0.5 +VALIDATION_MULTIPLIER = 0.5 + + +class ModelTrainer: + """ + ModelTrainer Class: + + This class provides a wrapper around the fitting code and the generation of the Neural Network + + When the "hyperparametizable"* function is called with a dictionary of parameters, + it generates a NN and subsequentially performs a fit. + + The motivation behind this class is minimising the amount of redundant calls of each hyperopt + run, in particular this allows to completely reset the NN at the beginning of each iteration + reusing some of the previous work. + + *called in this way because it accept a dictionary of hyper-parameters which defines the Neural Network + """ + + def __init__(self, exp_info, pos_info, flavinfo, nnseed, pass_status="ok", failed_status="fail", debug=False): + """ + # Arguments: + - `exp_info`: list of dictionaries containing experiments + - `pos_info`: list of dictionaries containing positivity sets + - `flavinfo`: the object returned by fitting['basis'] + - `nnseed`: the seed used to initialise the Neural Network, will be passed to model_gen + - `pass_status`: flag to signal a good run + - `failed_status`: flag to signal a bad run + - `pass_status`: flag to signal a good run + - `failed_status`: flag to signal a bad run + - `debug`: flag to activate some debug options + """ + + # Save all input information + self.exp_info = exp_info + self.pos_info = pos_info + self.flavinfo = flavinfo + self.NNseed = nnseed + self.pass_status = pass_status + self.failed_status = failed_status + self.debug = debug + + # Initialise internal variables which define behaviour + self.print_summary = True + self.mode_hyperopt = False + self.model_file = None + self.impose_sumrule = True + + # Initialize the pdf layer + self.layer_pdf = None + + # Initialize the dictionaries which contain all fitting information + self.input_list = [] + self.ndata_dict = {} + self.training = {"output": [], "expdata": [], "losses": [], "ndata": 0, "model": None, "posdatasets": []} + self.validation = {"output": [], "expdata": [], "losses": [], "ndata": 0, "model": None} + self.experimental = {"output": [], "expdata": [], "losses": [], "ndata": 0, "model": None} + + self.test_dict = None + self._fill_the_dictionaries() + + if self.validation["ndata"] == 0: + self.no_validation = True + new_dict = {} + for key, item in self.ndata_dict.items(): + if key.endswith("_vl"): + continue + new_dict[key + "_vl"] = item + self.ndata_dict.update(new_dict) + self.validation["expdata"] = self.training["expdata"] + else: + self.no_validation = False + + @property + def model_file(self): + """ If a model_file is set the training model will try to get the weights form here """ + return self.__model_file + + @model_file.setter + def model_file(self, model_file): + self.__model_file = model_file + + def set_hyperopt(self, on, keys=None): + """ Set hyperopt options on and off (mostly suppresses some printing) """ + if keys is None: + keys = [] + self.hyperkeys = keys + if on: + self.print_summary = False + self.mode_hyperopt = True + else: + self.print_summary = True + self.mode_hyperopt = False + + ########################################################################### + # # Internal functions # + # Never to be called from the dark and cold outside world # + ########################################################################### + def _fill_the_dictionaries(self): + """ + This function fills the following dictionaries with fixed information, + -`training`: data for the fit + -`validation`: data which for the stopping + -`experimental`: 'true' data, only used for reporting purposes + -`ndata_dict`: a dictionary containing 'name of experiment' : Number Of Points + + Note: the dictionaries will be used at different stages filled with information that will be cleaned at every + hyperopt iteration. If not running hyperopt, the fixed-nonfixed thing makes no difference. + + The fixed information (i.e., non dependant on the parameters of the fit) contained in those dictionaries: + - `expdata`: experimental data + - `name`: names of the experiment + - `ndata`: number of experimental points + """ + for exp_dict in self.exp_info: + if exp_dict["name"] == TESTNAME: + self.test_dict = { + "output": [], + "expdata": exp_dict["expdata_true"], + "losses": [], + "ndata": exp_dict["ndata"] + exp_dict["ndata_vl"], + "model": None, + } + continue + self.training["expdata"].append(exp_dict["expdata"]) + self.validation["expdata"].append(exp_dict["expdata_vl"]) + self.experimental["expdata"].append(exp_dict["expdata_true"]) + + nd_tr = exp_dict["ndata"] + nd_vl = exp_dict["ndata_vl"] + + self.ndata_dict[exp_dict["name"]] = nd_tr + self.ndata_dict[exp_dict["name"] + "_vl"] = nd_vl + + self.training["ndata"] += nd_tr + self.validation["ndata"] += nd_vl + self.experimental["ndata"] += nd_tr + nd_vl + + for pos_dict in self.pos_info: + self.training["expdata"].append(pos_dict["expdata"]) + self.ndata_dict[pos_dict["name"]] = pos_dict["ndata"] + self.training["posdatasets"].append(pos_dict["name"]) + + self.ndata_dict["total_tr"] = self.training["ndata"] + self.ndata_dict["total_vl"] = self.validation["ndata"] + self.ndata_dict["total"] = self.experimental["ndata"] + + def _model_generation(self): + """ + Fills the three dictionaries (`training`, `validation`, `experimental`) with the `model` entry + + *Note*: before entering this function the dictionaries contain a list of inputs + and a list of outputs, but they are not connected. + This function connects inputs with outputs by injecting the PDF + + Compiles the validation and experimental models with fakes optimizers and learning rate + as they are never trained, but this is needed by some backends in order to run evaluate on them + """ + log.info("Generating the Model") + + input_list = self.input_list + # Create the training, validation and true (data w/o replica) models: + tr_model = MetaModel(input_list, self._pdf_injection(self.training["output"])) + vl_model = MetaModel(input_list, self._pdf_injection(self.validation["output"])) + true_model = MetaModel(input_list, self._pdf_injection(self.experimental["output"])) + + if self.model_file: + # If a model file is given, load the weights from there + # note: even though the load corresponds to the training model only, the layer_pdf is shared + # and so it should affect all models + tr_model.load_weights(self.model_file) + + if self.print_summary: + tr_model.summary() + + fake_opt = "RMSprop" + fake_lr = 0.01 + # Compile true and validation with fake lr/optimizer + vl_model.compile(optimizer_name=fake_opt, learning_rate=fake_lr, loss=self.validation["losses"]) + true_model.compile(optimizer_name=fake_opt, learning_rate=fake_lr, loss=self.experimental["losses"]) + + if self.test_dict: + # If a test_dict was given, create and compile also the test model + test_model = MetaModel(input_list, self._pdf_injection(self.test_dict["output"])) + test_model.compile(optimizer_name=fake_opt, learning_rate=fake_lr, loss=self.test_dict["losses"]) + self.test_dict["model"] = test_model + + self.training["model"] = tr_model + self.validation["model"] = vl_model + self.experimental["model"] = true_model + + def _reset_observables(self): + """ + Resets the 'output' and 'losses' entries of all 3 dictionaries, (`training`, `validation`, `experimental`) + as well as the input_list + this is necessary as these can either depend on the parametrization of the NN + or be obliterated when/if the backend state is reset + """ + self.input_list = [] + for key in ["output", "losses"]: + self.training[key] = [] + self.validation[key] = [] + self.experimental[key] = [] + if self.test_dict: + self.test_dict[key] = [] + + def _pdf_injection(self, olist): + """ + Takes as input a list of output layers and returns a corresponding list + where all output layers call the pdf layer at self.pdf_layer + """ + return [o(self.layer_pdf) for o in olist] + + ############################################################################ + # # Parametizable functions # + # # + # The functions defined in this block accept a 'params' dictionary which # + # defines the fit and the behaviours of the Neural Networks # + # # + # These are all called by the function hyperparamizable below # + # i.e., the most important function is hyperparametizable, which is a # + # wrapper around all of these # + ############################################################################ + def _generate_observables(self, pos_multiplier, pos_initial): + """ + This functions fills the 3 dictionaries (training, validation, experimental) + with the output layers and the loss functions + It also fill the list of input tensors (input_list) + + Parameters accepted: + - `pos_multiplier`: the multiplier to be applied to the positivity each 100 epochs + - `pos_initial`: the initial value for the positivity + """ + + # First reset the dictionaries + self._reset_observables() + log.info("Generating layers") + + # Now we need to loop over all dictionaries (First exp_info, then pos_info) + for exp_dict in self.exp_info: + if not self.mode_hyperopt: + log.info("Generating layers for experiment {0}".format(exp_dict["name"])) + + exp_layer = model_gen.observable_generator(exp_dict) + + # Save the input(s) corresponding to this experiment + self.input_list += exp_layer["inputs"] + + if exp_dict["name"] == TESTNAME and self.test_dict: + # If this is the test set, fill the dictionary and stop here + self.test_dict["output"].append(exp_layer["output"]) + self.test_dict["losses"].append(exp_layer["loss"]) + continue + + # Now save the observable layer, the losses and the experimental data + self.training["output"].append(exp_layer["output_tr"]) + self.validation["output"].append(exp_layer["output_vl"]) + self.experimental["output"].append(exp_layer["output"]) + + self.training["losses"].append(exp_layer["loss_tr"]) + self.validation["losses"].append(exp_layer["loss_vl"]) + self.experimental["losses"].append(exp_layer["loss"]) + + # Finally generate the positivity penalty + for pos_dict in self.pos_info: + if not self.mode_hyperopt: + log.info("Generating positivity penalty for {0}".format(pos_dict["name"])) + pos_layer = model_gen.observable_generator( + pos_dict, positivity_initial=pos_initial, positivity_multiplier=pos_multiplier + ) + # The input list is still common + self.input_list += pos_layer["inputs"] + + # The positivity all falls to the training + self.training["output"].append(pos_layer["output_tr"]) + self.training["losses"].append(pos_layer["loss_tr"]) + # Save the positivity multiplier into the training dictionary as it will be used during training + self.training["pos_multiplier"] = pos_multiplier + + def _generate_pdf(self, nodes_per_layer, activation_per_layer, initializer, layer_type, dropout): + """ + Defines the internal variable layer_pdf + this layer takes any input (x) and returns the pdf value for that x + + if the sumrule is being imposed, it also updates input_list with the + integrator_input tensor used to calculate the sumrule + + # Returns: (layers, integrator_input) + `layers`: a list of layers + `integrator_input`: input used to compute the sumrule + both are being used at the moment for reporting purposes at the end of the fit + + Parameters accepted: + - `nodes_per_layer` + - `activation_per_layer` + - `initializer` + - `layer_type` + - `dropout` + see model_gen.pdfNN_layer_generator for more information + """ + log.info("Generating PDF layer") + + # Set the parameters of the NN + + # Generate the NN layers + layer_pdf, layers = model_gen.pdfNN_layer_generator( + nodes=nodes_per_layer, + activations=activation_per_layer, + layer_type=layer_type, + flav_info=self.flavinfo, + seed=self.NNseed, + initializer_name=initializer, + dropout=dropout, + ) + + integrator_input = None + if self.impose_sumrule: + # Impose the sumrule + # Inyect here momentum sum rule, effecively modifying layer_pdf + layer_pdf, integrator_input = msr_constraints.msr_impose(layers["fitbasis"], layer_pdf) + self.input_list.append(integrator_input) + + self.layer_pdf = layer_pdf + + return layers, integrator_input + + def _model_compilation(self, learning_rate, optimizer): + """ + Compiles the model with the data given in params + + Parameters accepted: + - `learning_rate` + - `optimizer` + Optimizers accepted are backend-dependent + """ + training_model = self.training["model"] + loss_list = self.training["losses"] + + training_model.compile(optimizer_name=optimizer, learning_rate=learning_rate, loss=loss_list) + + def _train_and_fit(self, validation_object, epochs): + """ + Trains the NN for the number of epochs given using + validation_object as the stopping criteria + + Every 100 epochs the positivitiy will be updated with + self.training['pos_multiplier'] + """ + training_model = self.training["model"] + exp_list = self.training["expdata"] + pos_multiplier = self.training["pos_multiplier"] + # Train the model for the number of epochs given + for epoch in range(epochs): + out = training_model.fit(x=None, y=exp_list, epochs=1, verbose=False, shuffle=False) + passes = validation_object.monitor_chi2(out, epoch) + + if validation_object.stop_here(): + break + + if (epoch + 1) % 100 == 0: + for pos_ds in self.training["posdatasets"]: + name = pos_ds + curr_w = training_model.get_layer(name).get_weights() + new_w = [curr_w[0] * pos_multiplier] + training_model.get_layer(name).set_weights(new_w) + + # Report a "good" training only if there was no NaNs and there was at least a point which passed positivity + if passes and validation_object.positivity: + return self.pass_status + else: + return self.failed_status + + def _hyperopt_override(self, params): + """ Unrolls complicated hyperopt structures into very simple dictionaries""" + # I love the smell of napalm in the morning + for hyperkey in self.hyperkeys: + item = params[hyperkey] + if isinstance(item, dict): + for key, value in item.items(): + params[key] = value + + def hyperparametizable(self, params): + """ + Wrapper around all the functions defining the fit. + + After the ModelTrainer class has been instantiated only a call to this function, with a `params` dictionary + is necessary to generate the whole PDF model and perform a fit. + + This is a necessary step for hyperopt to work + + Parameters used only here: + - `epochs`: maximum number of iterations for the fit to run + - `stopping_patience`: patience of the stopper after finding a new minimum + All other parameters are passed to the corresponding functions + """ + + # Reset the internal state of the backend + print("") + if not self.debug: + clear_backend_state() + + # When doing hyperopt some entries in the params dictionary can bring with them overriding arguments + if self.mode_hyperopt: + log.info("Performing hyperparameter scan") + for key in self.hyperkeys: + log.info(" > > Testing {0} = {1}".format(key, params[key])) + self._hyperopt_override(params) + + # Fill the 3 dictionaries (training, validation, experimental) with the layers and losses + self._generate_observables(params['pos_multiplier'], params['pos_initial']) + + # Generate the pdf layer + layers, integrator_input = self._generate_pdf( + params["nodes_per_layer"], + params["activation_per_layer"], + params["initializer"], + params["layer_type"], + params["dropout"] + ) + + # Model generation + self._model_generation() + + # Generate the validation_object + # this object golds statistical information about the fit and it can be used to perform stopping + epochs = int(params["epochs"]) + stopping_patience = params["stopping_patience"] + stopping_epochs = epochs * stopping_patience + + # If the tr/vl splitting is == 1, there will be no points in the validation so we use the training as val + if self.no_validation: + validation_object = Statistics.Stat_Info( + self.training["model"], + self.training["expdata"], + self.ndata_dict, + total_epochs=epochs, + stopping_epochs=stopping_epochs, + ) + else: + validation_object = Statistics.Stat_Info( + self.validation["model"], + self.validation["expdata"], + self.ndata_dict, + total_epochs=epochs, + stopping_epochs=stopping_epochs, + ) + + # Compile the training['model'] with the given parameters + self._model_compilation(params['learning_rate'], params['optimizer']) + + ### Training loop + passed = self._train_and_fit(validation_object, epochs) + + # After training has completed, reload the best_model found during training + validation_object.reload_model() + + # Compute validation loss + validation_loss = validation_object.loss() + + # Compute experimental loss + exp_final = self.experimental["model"].evaluate(x=None, y=self.experimental["expdata"], batch_size=1) + try: + experimental_loss = exp_final[0] / self.experimental["ndata"] + except IndexError: + experimental_loss = exp_final / self.experimental["ndata"] + + # Compute the testing loss if it was given + if self.test_dict: + # Generate the 'true' chi2 with the experimental model but only for models that were stopped + target_model = self.test_dict["model"] + target_data = self.test_dict["expdata"] + out_final = target_model.evaluate(x=None, y=target_data, verbose=False, batch_size=1) + try: + testing_loss = out_final[0] / self.test_dict["ndata"] + except IndexError: + testing_loss = out_final / self.test_dict["ndata"] + else: + testing_loss = 0.0 + + if self.mode_hyperopt: + final_loss = VALIDATION_MULTIPLIER * validation_loss + final_loss += TEST_MULTIPLIER * testing_loss + final_loss /= TEST_MULTIPLIER + VALIDATION_MULTIPLIER + arc_lengths = msr_constraints.compute_arclength(layers["fitbasis"], verbose=False) + else: + final_loss = experimental_loss + arc_lengths = None + + dict_out = { + "loss": final_loss, + "status": passed, + "arc_lengths": arc_lengths, + "training_loss": validation_object.tr_loss(), + "validation_loss": validation_loss, + "experimental_loss": experimental_loss, + "testing_loss": testing_loss, + } + + if self.mode_hyperopt: + # If we are using hyperopt we don't need to output any other information + return dict_out + + # Add to the output dictionary things that are needed by fit.py + # to generate the output pdf, check the arc-length, gather stats, etc + # some of them are already attributes of the class so they are redundant here + # but I think it's good to present them explicitly + dict_out["layer_pdf"] = self.layer_pdf + dict_out["layers"] = layers + dict_out["integrator_input"] = integrator_input + dict_out["validation_object"] = validation_object + dict_out["training"] = self.training + + return dict_out diff --git a/n3fit/src/n3fit/Statistics.py b/n3fit/src/n3fit/Statistics.py new file mode 100644 index 0000000000..3c077154d8 --- /dev/null +++ b/n3fit/src/n3fit/Statistics.py @@ -0,0 +1,310 @@ +import numpy as np +from matplotlib import pyplot as pl + + +class Stat_Info: + """ + To instantiate the Stats_Info object we need: + - `vl_model`: The validation model, which has the same PDF layer as the training model + - `vl_data`: The validation exp data to compare to + - `ndata_dict`: A dictionary with the number of points of each experiment in the format: + `exp`: npoints in the training model for experiment exp + `exp_vl` : npoints in the training model for experiment exp + - `chi2_threshold`: at which training chi do we start checking for a stopping point? + - `save_all_each`: in which interval should we save all chi2 per experiment? + - `stopping_epochs`: after a growth in chi2 occurs, we activate the stopping monitor + if the chi2 doesnt go below the minimum after stopping_epochs, we stop + - `dont_stop`: don't care about early stopping + + # TODO: this function has outgrown its initial scope and some refactoring is needed + """ + + def __init__( + self, + vl_model, + vl_data, + ndata_dict, + threshold_positivity=1e-6, + chi2_threshold=3.0, + save_all_each=100, + total_epochs=0, + stopping_epochs=7000, + dont_stop=False, + ): + + + self.vl_model = vl_model + self.vl_data = vl_data + + self.stop_now = False + self.nan_found = False + self.stopping_epochs = stopping_epochs + self.stopping_degree = 0 + self.total_epochs = total_epochs + self.epoch_of_the_stop = total_epochs + self.dont_stop = dont_stop + self.total_epochs = total_epochs + self.not_reloaded = True + self.save_all_each = save_all_each + + # Threshold for positivity + self.threshold_positivity = threshold_positivity + + # Best model stats + self.threshold = chi2_threshold + + # Number of points per experiment + self.ndata_tr = max(ndata_dict["total_tr"], 1) + self.ndata_vl = max(ndata_dict["total_vl"], 1) + self.ndata_dict = ndata_dict + self.exp_names = [] + for i in ndata_dict.keys(): + if i.endswith("_vl") or i.startswith(("POS", "total", "ALL")): + continue + self.exp_names.append(i) + + self._clean_best_model() + self._clean_full_stats() + + def _clean_best_model(self): + self.best_chi2 = 1e9 + self.e_best_chi2 = 0 + self.w_best_chi2 = None + self.positivity = False + self.saved_history = None + + def _clean_full_stats(self): + # Complete stats + self.epochs = [] + self.tr_chi2 = [] + self.vl_chi2 = [] + self.all_tr_chi2 = {} + self.all_vl_chi2 = {} + self.all_epochs = [] + for exp in self.exp_names: + self.all_tr_chi2[exp] = [] + self.all_vl_chi2[exp] = [] + self.all_tr_chi2["Total"] = [] + self.all_vl_chi2["Total"] = [] + + def reset(self, stopping_epochs=None): + """ + Sometimes it might be necessary to reset the statistics + This only require resetting some of the variables and not all + """ + self._clean_full_stats() + self._clean_best_model() + self.stop_now = False + self.epoch_of_the_stop = self.total_epochs + self.not_reloaded = True + self.stopping_degree = 0 + if stopping_epochs: + self.stopping_epochs = stopping_epochs + + def _compute_validation(self): + """ Evaluates the validation model + vl_list: a list of chi2, where the first element is the sum of all others + If there is only one experiment/dataset, vl_list is a float instead + + Returns the list already normalised to the number of points + """ + vl_list = self.vl_model.evaluate(x=None, y=self.vl_data, verbose=False, batch_size=1) + vl_dict = {} + try: + total = vl_list[0] / self.ndata_vl + vl_list = vl_list[1:] + except IndexError: + total = vl_list / self.ndata_vl + vl_list = [] + + # If validation dont include all experiments, there is something wrong in the modle + # and since evaluate returns a list with no names, + # it is impossible to know which experiment corresponds to which chi2 + if len(vl_list) != len(self.exp_names): + vl_list = [0.0 for i in self.exp_names] + + # This loop relies on the information that comes through the input dictionaries to be accurate + # It also relies in the ordered dictionaries introduced at some point in python 3.6/7 + for chi2, exp in zip(vl_list, self.exp_names): + vl_dict[exp] = chi2 / self.ndata_dict[exp] + + return total, vl_dict + + def _parse_training(self, training_info): + """ Receives an object containing the training chi2 and parses it + in the form of a { 'experiment' : chi2 } dictionary, where chi2 + is already normalised to the number of points per experiment """ + try: + hobj = training_info.history + except AttributeError: # So it works whether we pass the out our the out.history + hobj = training_info + + # Since the history object can have more than one loss (if epochs != 1) but it is always a list + # Save them as the mean + + total = np.mean(hobj["loss"]) / self.ndata_tr + tr_chi2 = {} + for exp in self.exp_names: + chi2 = np.mean(hobj[exp + "_loss"]) / self.ndata_dict[exp] + tr_chi2[exp] = chi2 + + return total, tr_chi2 + + def monitor_chi2(self, training_info, epoch): + """ Function to be called at the end of every epoch. + Stores the total chi2 of the training set as well as the + total chi2 of the validation set. + If the training chi2 is below a certain threshold, + stores the state of the model which gave the minimum chi2 + as well as the epoch in which occurred + If the epoch is a multiple of save_all_each then we also save the per-exp chi2 + + Returns True if the run seems ok and False if a NaN is found + """ + vl_chi2, all_vl = self._compute_validation() + tr_chi2, all_tr = self._parse_training(training_info) + + if np.isnan(tr_chi2): + print(" > NaN found, stopping activated") + self.stop_now = True + self.nan_found = True + self.epoch_of_the_stop = epoch + self.reload_model() + if not self.w_best_chi2: + self.best_chi2 = 1e10 + return False + + if vl_chi2 == 0.0: # If there is no masking, training and validation are the same thing + vl_chi2 = tr_chi2 + all_vl = all_tr + + self.epochs.append(epoch) + self.tr_chi2.append(tr_chi2) + self.vl_chi2.append(vl_chi2) + + if tr_chi2 < self.threshold: + self.stopping_degree += 1 + + if vl_chi2 < self.best_chi2 and self.check_positivity(training_info): + self.positivity = True + self.best_chi2 = vl_chi2 + self.e_best_chi2 = epoch + self.w_best_chi2 = self.vl_model.get_weights() + self.stopping_degree = 0 + self.saved_history = training_info + elif not self.w_best_chi2: + self.stopping_degree = 0 + + if (epoch + 1) % 100 == 0: + self.all_epochs.append(epoch + 1) + for exp in self.exp_names: + self.all_tr_chi2[exp].append(all_tr[exp]) + self.all_vl_chi2[exp].append(all_vl[exp]) + self.all_tr_chi2["Total"].append(tr_chi2) + self.all_vl_chi2["Total"].append(vl_chi2) + self.print_current_stats() + + if self.stopping_degree > self.stopping_epochs: + self.stop_now = True + self.epoch_of_the_stop = epoch + self.reload_model(force=True) + + return True + + def reload_model(self, force=False): + if (self.not_reloaded or force) and self.w_best_chi2: + self.not_reloaded = False + self.vl_model.set_weights(self.w_best_chi2) + + def good_stop(self): + """ Returns true if and only if the stopping criteria was actually fulfilled + and we didnt stop by error/nan """ + return self.positivity and not self.nan_found + + def stop_here(self): + if self.dont_stop: + return False + else: + return self.stop_now + + def loss(self): + return self.best_chi2 + + def tr_loss(self): + i = self.epochs.index(self.e_best_chi2) + return self.tr_chi2[i] + + def print_current_stats(self): + """ + Prints the last validation and training loss saved + """ + epoch = self.all_epochs[-1] + epochs = self.total_epochs + total = self.all_tr_chi2["Total"][-1] + total_str = "\nAt epoch {0}/{1}, total loss: {2}".format(epoch, epochs, total) + + partials = "" + for exp in self.exp_names: + partials += "{0}: {1:.3f} ".format(exp, self.all_tr_chi2[exp][-1]) + + print(total_str) + if partials: + print(" > Partial losses: " + partials) + + total_vl = self.all_vl_chi2["Total"][-1] + print(" > Validation loss at this point: {0}".format(total_vl)) + + def plot(self): + print("Plotting validation loss") + pl.plot(self.epochs, self.tr_chi2, label="Training") + pl.plot(self.epochs, self.vl_chi2, label="Validation") + pl.legend() + pl.show() + + def check_positivity(self, training_info): + """ Checks whether positivity passes """ + chi2 = 0.0 + try: + hobj = training_info.history + except AttributeError: # So it works whether we pass the out our the out.history + hobj = training_info + + for key, item in hobj.items(): + if "POS" in key: + if item[-1] > 0.0: + chi2 += item[-1] + if chi2 > self.threshold_positivity: + return False + else: + return True + + # Str functions + def positivity_pass(self): + if self.positivity: + return "POS_PASS" + else: + return "POS_VETO" + + def preproc_str(self): + preprocessing = self.vl_model.get_layer("pdf_prepro") + alphabeta = preprocessing.get_weights() + file_list = ["Alpha Beta"] + for i in range(0, len(alphabeta), 2): + file_list.append("\n{0} {1}".format(alphabeta[i][0], alphabeta[i + 1][0])) + file_list.append("\n") + return file_list + + def chi2exps_str(self): + file_list = [] + nexp = len(self.exp_names) + for i, epoch in enumerate(self.all_epochs): + strout = "\nEpoch: {0} nexp {1}".format(epoch, nexp) + for exp in self.exp_names: + tr = self.all_tr_chi2[exp][i] + vl = self.all_vl_chi2[exp][i] + strout += "\n{0}: {1} {2}".format(exp, tr, vl) + tr = self.all_tr_chi2["Total"][i] + vl = self.all_vl_chi2["Total"][i] + strout += "\nTotal: training = {0} validation = {1}\n".format(tr, vl) + file_list.append(strout) + return file_list diff --git a/n3fit/src/n3fit/__init__.py b/n3fit/src/n3fit/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/n3fit/src/n3fit/backends/__init__.py b/n3fit/src/n3fit/backends/__init__.py new file mode 100644 index 0000000000..15def8a2bd --- /dev/null +++ b/n3fit/src/n3fit/backends/__init__.py @@ -0,0 +1,9 @@ +from n3fit.backends.keras_backend.internal_state import set_initial_state, clear_backend_state +from n3fit.backends.keras_backend.MetaLayer import MetaLayer +from n3fit.backends.keras_backend.MetaModel import MetaModel +from n3fit.backends.keras_backend.base_layers import concatenate, Lambda, base_layer_selector +from n3fit.backends.keras_backend import losses +from n3fit.backends.keras_backend import operations +from n3fit.backends.keras_backend import constraints + +print("Using Keras backend") diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py b/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py new file mode 100644 index 0000000000..af224ce4e3 --- /dev/null +++ b/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py @@ -0,0 +1,148 @@ +""" + The class MetaLayer is an extension of the backend Layer class + with a number of methods and helpers to facilitate writing new custom layers + in such a way that the new custom layer don't need to rely in anything backend-dependent + + In other words, if you want to implement a new layer and need functions not included here + it is better to add a new method which is just a call to the relevant backend-dependent function + For instance: np_to_tensor is just a call to K.constant +""" + + +from keras import backend as K +from keras.engine.topology import Layer +from keras.initializers import Constant, RandomUniform, glorot_normal, glorot_uniform + + +class MetaLayer(Layer): + """ + This metalayer function must contain all backend-dependent functions + + In order to write a custom Keras layer you usually need to override: + - __init__ + - build + - call + - output_shape + """ + + # Define in this dictionary new initializers as well as the arguments they accept (with default values if needed be) + initializers = { + "random_uniform": (RandomUniform, {"minval": -0.5, "maxval": 0.5}), + "glorot_uniform": (glorot_uniform, {}), + "glorot_normal": (glorot_normal, {}), + } + + # Building function + def builder_helper(self, name, kernel_shape, initializer, trainable=True, constraint=None): + """ + Creates a kernel that should be saved as an attribute of the caller class + name: name of the kernel + shape: tuple with its shape + initializer: one of the initializers from this class (actually, any keras initializer) + trainable: if it is + constraint: one of the constraints from this class (actually, any keras constraints) + """ + kernel = self.add_weight( + name=name, shape=kernel_shape, initializer=initializer, trainable=trainable, constraint=constraint + ) + return kernel + + # Implemented initializers + @staticmethod + def init_constant(value): + return Constant(value=value) + + @staticmethod + def select_initializer(ini_name, seed=None, **kwargs): + """ + Selects one of the initializers (which does initialize, i.e., not constant) + All of them should accept seed + """ + try: + ini_tuple = MetaLayer.initializers[ini_name] + except KeyError as e: + raise NotImplementedError( + f"[MetaLayer.select_initializer] initializer not implemented: {ini_name}" + ) from e + + ini_class = ini_tuple[0] + ini_args = ini_tuple[1] + ini_args["seed"] = seed + + for key, value in kwargs.items(): + if key in ini_args.keys(): + ini_args[key] = value + return ini_class(**ini_args) + + # Make numpy array into a tensor + def np_to_tensor(self, np_array, **kwargs): + """ + Given a numpy array, returns a constant tensor + """ + return K.constant(np_array, **kwargs) + + # Common tensor operations + def tensor_ones(self, shape, **kwargs): + """ + Generates a tensor of ones of the given shape + """ + return K.ones(shape, **kwargs) + + def tensor_ones_like(self, tensor, **kwargs): + """ + Generates a tensor of ones of the same shape as the input tensor + """ + return K.ones_like(tensor, **kwargs) + + def many_replication(self, grid, replications, axis=0, **kwargs): + """ + Generates a tensor with one extra dimension: + a repetition of "grid" n times along the given axis + from keras documentation: + If x has shape (s1, s2, s3) and axis is 1, the output will have shape (s1, s2 * rep, s3) + """ + return K.repeat_elements(grid, rep=replications, axis=axis, **kwargs) + + def sum(self, tensor, axis=None, **kwargs): + """ + Computes the sum of the elements of the tensor + """ + return K.sum(tensor, axis=axis, **kwargs) + + def tensor_product(self, tensor_x, tensor_y, axes, **kwargs): + """ + Computes the tensordot product between tensor_x and tensor_y + """ + return K.tf.tensordot(tensor_x, tensor_y, axes=axes, **kwargs) + + def transpose(self, tensor, **kwargs): + """ + Transpose a tensor + """ + return K.transpose(tensor, **kwargs) + + def boolean_mask(self, tensor, mask, axis=None, **kwargs): + """ + Applies boolean mask to a tensor + """ + return K.tf.boolean_mask(tensor, mask, axis=axis, **kwargs) + + def concatenate(self, tensor_list, axis=-1, target_shape=None): + """ + Concatenates a list of numbers or tenosr into a bigger tensor + If the target shape is given, the output is reshaped to said shape + """ + concatenated_tensor = K.concatenate(tensor_list, axis=axis) + if target_shape: + return K.reshape(concatenated_tensor, target_shape) + else: + return concatenated_tensor + + def permute_dimensions(self, tensor, permutation, **kwargs): + """ + Receives a tensor and a tuple and permutes the axes of the tensor according to it. + i.e. + if permutation = (1,0,2) + does the permutation: axis_0 -> axis_1, axis_1 -> axis_0, axis_2 -> axis_2 + """ + return K.permute_dimensions(tensor, permutation, **kwargs) diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py new file mode 100644 index 0000000000..17f1a1944f --- /dev/null +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -0,0 +1,105 @@ +""" + MetaModel class + + Extension of the backend Model class containing some wrappers in order to absorb other + backend-dependent calls +""" +import numpy as np +from keras.models import Model +from keras.layers import Input +import keras.optimizers as Kopt +import keras.backend as K + + +class MetaModel(Model): + """ + The goal of this class is to absorb all keras dependent code + """ + + # Define in this dictionary new optimizers as well as the arguments they accept (with default values if needed be) + optimizers = { + 'RMSprop' : ( + Kopt.RMSprop, {'lr': 0.01} + ), + 'Adam' : ( + Kopt.Adam, {'lr' : 0.01} + ), + 'Adagrad' : ( + Kopt.Adagrad, {} ), + 'Adadelta' : ( + Kopt.Adadelta, {} ), + 'Adamax' : ( + Kopt.Adamax, {} ), + 'Nadam' : ( + Kopt.Nadam, {} ), + 'Amsgrad' : ( + Kopt.Adam, {'lr' : 0.01, 'amsgrad' : True } + ), + } + + def __init__(self, input_tensors, output_tensors, extra_tensors=None): + """ + This class behaves as keras.models.Model with some add-ons: + + if extra_tensors are given, they should come in the combination + (input: np.array, output_layer: function) so that they can be fed to keras as output_layer(*input) + + This function will turn all inputs into keras tensors + """ + input_list = input_tensors + output_list = output_tensors + + if not isinstance(input_list, list): + input_list = [input_list] + if not isinstance(output_list, list): + output_list = [output_list] + + # Add extra tensors + if extra_tensors is not None: + for ii, oo in extra_tensors: + inputs = [] + if isinstance(ii, list): + for i in ii: + inputs.append(self._np_to_input(i)) + else: + inputs = [self._np_to_input(ii)] + o_tensor = oo(*inputs) + input_list += inputs + output_list.append(o_tensor) + + super(MetaModel, self).__init__(input_list, output_list) + + def _np_to_input(self, x): + """ + If x is a numpy array, make it into a numpy tensor + """ + if isinstance(x, np.ndarray): + tensor = K.constant(x) + return Input(tensor=tensor) + else: + return x + + def compile(self, optimizer_name="RMSprop", learning_rate=0.05, loss=None, **kwargs): + """ + Compile the model given: + - Optimizer + - Learning Rate + - List of loss functions + """ + try: + opt_tuple = self.optimizers[optimizer_name] + except KeyError as e: + raise NotImplementedError( + f"[MetaModel.select_initializer] optimizer not implemented: {optimizer_name}" + ) from e + + opt_function = opt_tuple[0] + opt_args = opt_tuple[1] + + if "lr" in opt_args.keys(): + opt_args["lr"] = learning_rate + + opt_args["clipnorm"] = 1.0 + opt = opt_function(**opt_args) + + super(MetaModel, self).compile(optimizer=opt, loss=loss, **kwargs) diff --git a/n3fit/src/n3fit/backends/keras_backend/__init__.py b/n3fit/src/n3fit/backends/keras_backend/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/n3fit/src/n3fit/backends/keras_backend/base_layers.py b/n3fit/src/n3fit/backends/keras_backend/base_layers.py new file mode 100644 index 0000000000..6f372eb5ff --- /dev/null +++ b/n3fit/src/n3fit/backends/keras_backend/base_layers.py @@ -0,0 +1,71 @@ +""" + For a layer to be used by n3fit it should be contained in the layers dictionary below + This dictionary has the following structure: + + 'name of the layer' : ( Layer_class, {dictionary of arguments: defaults} ) +""" + +from keras.layers import Dense, Lambda, LSTM, Dropout, concatenate +from keras.backend import expand_dims + + +def LSTM_modified(**kwargs): + """ + LSTM asks for a sample X timestep X features kind of thing so we need to reshape the input + """ + the_lstm = LSTM(**kwargs) + ExpandDim = Lambda(lambda x: expand_dims(x, axis=-1)) + + def ReshapedLSTM(input_tensor): + if len(input_tensor.shape) == 2: + reshaped = ExpandDim(input_tensor) + return the_lstm(reshaped) + else: + return the_lstm(input_tensor) + + return ReshapedLSTM + + +layers = { + "dense": ( + Dense, + { + "input_shape": (1,), + "kernel_initializer": "glorot_normal", + "units": 5, + "activation": "sigmoid", + }, + ), + "LSTM": ( + LSTM_modified, + {"kernel_initializer": "glorot_normal", "units": 5, "activation": "sigmoid"}, + ), + "dropout": (Dropout, {"rate": 0.0}), +} + + +def base_layer_selector(layer_name, **kwargs): + """ + Given a layer name, looks for it in the `layers` dictionary and returns an instance. + + The layer dictionary defines a number of defaults but they can be overwritten/enhanced through kwargs + + # Arguments: + - `layer_name`: str with the name of the layer + - `**kwargs`: extra optional arguments to pass to the layer (beyond their defaults) + """ + try: + layer_tuple = layers[layer_name] + except KeyError as e: + raise NotImplementedError( + "Layer not implemented in keras_backend/base_layers.py: {0}".format(layer_name) + ) from e + + layer_class = layer_tuple[0] + layer_args = layer_tuple[1] + + for key, value in kwargs.items(): + if key in layer_args.keys(): + layer_args[key] = value + + return layer_class(**layer_args) diff --git a/n3fit/src/n3fit/backends/keras_backend/constraints.py b/n3fit/src/n3fit/backends/keras_backend/constraints.py new file mode 100644 index 0000000000..4a5a8247c6 --- /dev/null +++ b/n3fit/src/n3fit/backends/keras_backend/constraints.py @@ -0,0 +1,22 @@ +""" + Implementations of weight constraints for initializers +""" + +from keras.constraints import MinMaxNorm +from keras import backend as K + + +class MinMaxWeight(MinMaxNorm): + """ + Small override to the MinMaxNorm Keras class to not look at the absolute value + This version looks at the sum instead of at the norm + """ + + def __init__(self, min_value, max_value, **kwargs): + super(MinMaxWeight, self).__init__(min_value=min_value, max_value=max_value, **kwargs) + + def __call__(self, w): + norms = K.sum(w, axis=self.axis, keepdims=True) + desired = self.rate * K.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms + w *= desired / (K.epsilon() + norms) + return w diff --git a/n3fit/src/n3fit/backends/keras_backend/internal_state.py b/n3fit/src/n3fit/backends/keras_backend/internal_state.py new file mode 100644 index 0000000000..82c6bea888 --- /dev/null +++ b/n3fit/src/n3fit/backends/keras_backend/internal_state.py @@ -0,0 +1,51 @@ +""" + Library of functions that modify the internal state of Keras/Tensorflow +""" + +import numpy as np +import random as rn +from keras import backend as K + + +def set_initial_state(seed=13): + """ + Sets the initial state of the backend + This is the only way of getting reproducible results for keras-tensorflow + + This function needs to be called before __any__ tensorflow related stuff is called so + it will clear the keras session to ensure the initial state is set + + At the moment this is only enabled for debugging as forces the use of only one thread + """ + + np.random.seed(seed) + use_seed = np.random.randint(0, pow(2, 31)) + rn.seed(use_seed) + + # Clear the state of keras in case anyone used it before + K.clear_session() + + session_conf = K.tf.ConfigProto(intra_op_parallelism_threads=1, + inter_op_parallelism_threads=1) + K.tf.set_random_seed(use_seed) + sess = K.tf.Session(graph=K.tf.get_default_graph(), config=session_conf) + K.set_session(sess) + + return 0 + + +def clear_backend_state(): + """ + Clears the state of the backend and opens a new session. + + Note that this function needs to set the TF session, including threads and processes + i.e., this function must NEVER be called after setting the initial state. + """ + print("Clearing session") + + K.clear_session() + # Don't open threads that you are not going to eat + session_conf = K.tf.ConfigProto(intra_op_parallelism_threads=2, + inter_op_parallelism_threads=8) + sess = K.tf.Session(graph=K.tf.get_default_graph(), config=session_conf) + K.set_session(sess) diff --git a/n3fit/src/n3fit/backends/keras_backend/losses.py b/n3fit/src/n3fit/backends/keras_backend/losses.py new file mode 100644 index 0000000000..b8f07d8a90 --- /dev/null +++ b/n3fit/src/n3fit/backends/keras_backend/losses.py @@ -0,0 +1,34 @@ +""" + Module containing a list of loss functions availables to the fitting code +""" + +import keras.backend as K + + +def l_invcovmat(invcovmat_np): + """ + Returns a loss function such that: + L = \sum_{ij} (yt - yp)_{i} invcovmat_{ij} (yt - yp)_{j} + """ + invcovmat = K.constant(invcovmat_np) + + def true_loss(y_true, y_pred): + # (yt - yp) * covmat * (yt - yp) + tmp = y_true - y_pred + right_dot = K.tf.tensordot(invcovmat, K.transpose(tmp), axes=1) + return K.tf.tensordot(tmp, right_dot, axes=1) + + return true_loss + + +def l_positivity(alpha=1e-7): + """ + Returns L = elu(y_pred) (considers y_true as 0) + """ + + def true_loss(y_true, y_pred): + y = -y_pred + loss = K.elu(y, alpha=alpha) + return K.sum(loss) + + return true_loss diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py new file mode 100644 index 0000000000..e189bbe377 --- /dev/null +++ b/n3fit/src/n3fit/backends/keras_backend/operations.py @@ -0,0 +1,142 @@ +""" + This module containg a list of useful operations translated in the keras language + + The main goal was (is) to implement the NNPDF operations on fktable in the keras + language (hence the mapping `c_to_py_fun`) + + All operations accept as an input an iterable of keras layers with + the same output_shape (usually the output of an Observable layer) + and return a keras operation with, again, the same output shape. +""" + +from keras.layers import add as keras_add +from keras.layers import subtract as keras_subtract +from keras.layers import Lambda as keras_Lambda +from keras.layers import multiply as keras_multiply + +from keras.layers import Input +from keras import backend as K + + +def numpy_to_input(numpy_array): + return Input(tensor=K.constant(numpy_array)) + + +def c_to_py_fun(op_name, name, default="ADD"): + """ + Map between the NNPDF operations and the operations defined in this file + Any new backend must implement such a mapping + """ # TODO: shouldn't this be outside of the backend folder then + # surely... + d = { + 'NULL' : op_null, + 'ADD' : op_add, + 'RATIO' : op_ratio, + 'ASY' : op_asy, + 'SMN' : op_smn, + } + if op_name not in d.keys(): + print("Operation name not recognised, defaulting to {0}".format(default)) + return d[default] + + def operation_fun(o_list): + return d[op_name](o_list, name=name) + + return operation_fun + + +def op_multiply(o_list, **kwargs): + """ + Receives a list of layers of the same output size and multiply them element-wise + """ + return keras_multiply(o_list, **kwargs) + + +def op_multiply_dim(o_list, **kwargs): + """ + Bypass in order to multiply two layers with different output dimension + for instance: (10000 x 14) * (14) + as the normal keras multiply don't accept it (but somewhow it does accept it doing it like this) + """ + if len(o_list) != 2: + raise ValueError( + "The number of observables is incorrect, operations.py:op_multiply_dim, expected 2, received {0}".format( + len(o_list) + ) + ) + create_operation = keras_Lambda(lambda inputs: inputs[0] * inputs[1]) + return create_operation(o_list) + + +def op_null(o_list, **kwargs): + """ + Not a compound object, do nothing + """ + return o_list[0] + + +def op_add(o_list, **kwargs): + """ + Sum a list of layers with the same output dim + """ + return keras_add(o_list, **kwargs) + + +def op_subtract(o_list, **kwargs): + """ + Subtract all observables + """ + return keras_subtract(o_list) + + +def op_log(o_tensor, **kwargs): + """ + Computes the logarithm of the input + """ + return K.log(o_tensor) + + +def op_ratio(o_list, **kwargs): + """ + Take the ratio of two observables + """ + if len(o_list) != 2: + raise ValueError( + "The number of observables is incorrect, operations.py:op_ratio, expected 2, received {0}".format( + len(o_list) + ) + ) + + division_layer = keras_Lambda(lambda inputs: inputs[0] / inputs[1], **kwargs) + return division_layer(o_list) + + +def op_asy(o_list, **kwargs): + """ + Perform the asymmetry operation on two observables + """ + if len(o_list) != 2: + raise ValueError( + "The number of observables is incorrect, operations.py:op_asy, expected 2, received {0}".format( + len(o_list) + ) + ) + + subtraction = keras_subtract(o_list) + addition = op_add(o_list) + return op_ratio([subtraction, addition], **kwargs) + + +def op_smn(o_list, **kwargs): + """ + Normalised sum + """ + if len(o_list) != 4: + raise ValueError( + "The number of observables is incorrect, operations.py:op_smn, expected 4, received {0}".format( + len(o_list) + ) + ) + numer = op_add(o_list[:2]) + denom = op_add(o_list[2:]) + return op_ratio([numer, denom], **kwargs) diff --git a/n3fit/src/n3fit/fit.py b/n3fit/src/n3fit/fit.py new file mode 100644 index 0000000000..c3cb84375c --- /dev/null +++ b/n3fit/src/n3fit/fit.py @@ -0,0 +1,311 @@ +""" + Fit action controller +""" + +# Backend-independent imports +import sys +import logging +import os.path +import numpy as np + +log = logging.getLogger(__name__) + +# Action to be called by validphys +# All information defining the NN should come here in the "parameters" dict +def fit( + fitting, + experiments, + t0set, + replica, + replica_path, + output_path, + theoryid, + posdatasets, + hyperscan=None, + hyperopt=None, + debug=False, + create_test_card=None, +): + """ + This action will (upon having read a validcard) process a full PDF fit for a given replica. + + The input to this function is provided by validphys and defined in the runcards or commandline arguments. + + The workflow of this controller is as follows: + 1. Using the replica number and the seeds defined in the runcard, generates a seed for everything + 2. Read up all datasets from the given experiments and create the necessary replicas + 2.1 Read up also the positivity sets + 3. Generate a ModelTrainer object which hold all information to create the NN and perform a fit + (at this point no NN object has been generated) + 3.1 (if hyperopt) generates the hyperopt scanning dictionary taking as a base the fitting dictionary + and the hyperscan dictionary defined in the runcard + 4. Pass the dictionary of parameters to ModelTrainer for the NN to be generated and the fit performed + 4.1 (if hyperopt) Loop over point 4 for `hyperopt` number of times + 5. Once the fit is finished, output the PDF grid and accompanying files + + # Arguments: + - `fitting`: dictionary with the hyperparameters of the fit + - `experiments`: vp list of experiments to be included in the fit + - `t0set`: t0set name + - `replica`: a list of replica numbers to run over (tipycally just one) + - `replica_path`: path to the output of this run + - `output_path`: name of the fit + - `theorid`: theory id number + - `posdatasets` : list of positivity datasets + - `hyperscan`: dictionary containing the details of the hyperscan + - `hyperopt`: if given, number of hyperopt iterations to run + - `debug`: activate some debug options + - `create_test_card`: read the runcard and output a new runcard with a test-set defined + """ + + # Call the script to generate a runcard with a test-set and immediately exit + if create_test_card: + from n3fit.hyper_optimization.create_testset import create_testset + + create_testset(experiments, runcard_file=create_test_card) + return 0 + ############### + + if debug: + # If debug is active, fix the initial state this should make the run reproducible + # (important to avoid non-deterministic multithread or hidden states) + from n3fit.backends import set_initial_state + + set_initial_state() + ############### + + # All potentially backend dependent imports should come inside the fit function + # so they can eventually be set from the runcard + from n3fit.ModelTrainer import ModelTrainer + from n3fit.io.writer import storefit + from n3fit.backends import MetaModel + import n3fit.io.reader as reader + import n3fit.msr as msr_constraints + + if hyperopt: + import hyperopt as hyper + import n3fit.hyper_optimization.filetrials as filetrials + + status_ok = hyper.STATUS_OK + else: + status_ok = "ok" + + # Loading t0set from LHAPDF + if t0set is not None: + t0pdfset = t0set.load_t0() + + # First set the seed variables for + # - Tr/Vl split + # - Neural Network initialization + # - Replica generation + # These depend both on the seed set in the runcard and the replica number + trvalseeds = [] + nnseeds = [] + mcseeds = [] + for replica_number in replica: + np.random.seed(fitting.get("trvlseed")) + for i in range(replica_number): + trvalseed = np.random.randint(0, pow(2, 31)) + + np.random.seed(fitting.get("nnseed")) + for i in range(replica_number): + nnseed = np.random.randint(0, pow(2, 31)) + + np.random.seed(fitting.get("mcseed")) + for i in range(replica_number): + mcseed = np.random.randint(0, pow(2, 31)) + trvalseeds.append(trvalseed) + nnseeds.append(nnseed) + mcseeds.append(mcseed) + + if fitting.get("genrep") == 0: + mcseeds = [] + + ############################################################################## + # ### Read files + # Loop over all the experiment and positivity datasets + # and save them into two list of dictionaries: exp_info and pos_info + # later on we will loop over these two lists and generate the actual layers + # i.e., at this point there is no information about the NN + # we are just creating dictionaries with all the necessary information + # (experimental data, covariance matrix, replicas, etc, tr/val split) + ############################################################################## + all_exp_infos = [[] for _ in replica] + # First loop over the experiments + for exp in experiments: + log.info("Loading experiment: {0}".format(exp)) + all_exp_dicts = reader.common_data_reader(exp, t0pdfset, replica_seeds=mcseeds, trval_seeds=trvalseeds) + for i, exp_dict in enumerate(all_exp_dicts): + all_exp_infos[i].append(exp_dict) + + # Now read all the positivity datasets + pos_info = [] + for pos_set in posdatasets: + log.info("Loading positivity dataset {0}".format(pos_set)) + pos_dict = reader.positivity_reader(pos_set) + pos_info.append(pos_dict) + + # Note: In the basic scenario we are only running for one replica and thus this loop is only + # run once and all_exp_infos is a list of just than one element + for replica_number, exp_info, nnseed in zip(replica, all_exp_infos, nnseeds): + replica_path_set = replica_path / "replica_{0}".format(replica_number) + log.info("Starting replica fit {0}".format(replica_number)) + + # Generate a ModelTrainer object + # this object holds all necessary information to train a PDF (still no information about the NN) + the_model_trainer = ModelTrainer( + exp_info, pos_info, fitting["basis"], nnseed, pass_status=status_ok, debug=debug + ) + + # Check whether we want to load weights from a file (maybe from a previous run) + # check whether the file exists, otherwise set it to none + # reading the data up will be done by the model_trainer + if fitting.get("load"): + model_file = fitting.get("loadfile") + log.info(" > Loading the weights from previous training from {0}".format(model_file)) + if not os.path.isfile(model_file): + log.warning(" > Model file {0} could not be found".format(model_file)) + model_file = None + else: + the_model_trainer.model_file = model_file + + # This is just to give a descriptive name to the fit function + pdf_gen_and_train_function = the_model_trainer.hyperparametizable + + # Read up the parameters of the NN from the runcard + parameters = fitting.get("parameters") + + ######################################################################## + # ### Hyperopt # + # If hyperopt is active the parameters of NN will be substituted by the# + # hyoperoptimizable variables. # + # Hyperopt will run for --hyperopt number of iterations before leaving # + # this block # + ######################################################################## + if hyperopt: + from n3fit.hyper_optimization.HyperScanner import HyperScanner + + the_scanner = HyperScanner(parameters) + + stopping_options = hyperscan.get("stopping") + positivity_options = hyperscan.get("positivity") + optimizer_options = hyperscan.get("optimizer") + architecture_options = hyperscan.get("architecture") + + # Enable scanner for certain parameters + the_scanner.stopping(**stopping_options) + the_scanner.positivity(**positivity_options) + the_scanner.optimizer(**optimizer_options) + # Enable scanner for specific architectures + the_scanner.NN_architecture(**architecture_options) + + # Tell the trainer we are doing hyperopt + the_model_trainer.set_hyperopt(on=True, keys=the_scanner.hyper_keys) + + # Generate Trials object + trials = filetrials.FileTrials(replica_path_set, log=log, parameters=parameters) + + # Perform the scan + try: + best = hyper.fmin( + fn=pdf_gen_and_train_function, + space=the_scanner.dict(), + algo=hyper.tpe.suggest, + max_evals=hyperopt, + trials=trials, + ) + except ValueError as e: + print("Error from hyperopt because no best model was found") + print("@fit.py, setting the best trial to empty dict") + print("Exception: {0}".format(e)) + sys.exit(0) + + # Now update the parameters with the ones found by the scan + true_best = hyper.space_eval(parameters, best) + the_scanner.update_dict(true_best) + parameters = the_scanner.dict() + print("##################") + print("Best model found: ") + for k, i in true_best.items(): + print(" {0} : {1} ".format(k, i)) + ####################################################################### end of hyperopt + + # Ensure hyperopt is off + the_model_trainer.set_hyperopt(on=False) + + ############################################################################# + # ### Fit # + # This function performs the actual fit, it reads all the parameters in the # + # "parameters" dictionary, uses them to generate the NN and trains the net # + ############################################################################# + result = pdf_gen_and_train_function(parameters) + + # After the fit is run we get a 'result' dictionary with the following items: + validation_object = result["validation_object"] + layer_pdf = result["layer_pdf"] + layers = result["layers"] + integrator_input = result["integrator_input"] + true_chi2 = result["loss"] + training = result["training"] + log.info("Total exp chi2: {0}".format(true_chi2)) + + # Where has the stopping point happened (this is only for debugging purposes) + print( + """ + > > The stopping point has been at: {0} with a loss of {1} + which it got at {2}. Stopping degree {3} + Positivity state: {4} + """.format( + validation_object.epoch_of_the_stop, + validation_object.loss(), + validation_object.e_best_chi2, + validation_object.stopping_degree, + validation_object.positivity_pass(), + ) + ) + + # Compute the arclengths + arc_lengths = msr_constraints.compute_arclength(layers["fitbasis"], verbose=True) + # Construct the chi2exp file + allchi2_lines = validation_object.chi2exps_str() + # Construct the preproc file + preproc_lines = validation_object.preproc_str() + + # Creates a PDF model for export grid + def pdf_function(export_xgrid): + """ + Receives an array, returns the result of the PDF for said array + """ + modelito = MetaModel([integrator_input], [], extra_tensors=[(export_xgrid, layer_pdf)]) + result = modelito.predict(x=None, steps=1) + return result + + # export PDF grid to file + storefit( + pdf_function, + replica_number, + replica_path_set, + output_path.stem, + theoryid.get_description().get("Q0") ** 2, + validation_object.epoch_of_the_stop, + validation_object.loss(), + validation_object.tr_loss(), + true_chi2, + arc_lengths, + allchi2_lines, + preproc_lines, + validation_object.positivity_pass(), + ) + + # Save the weights to some file + if fitting.get("save"): + model_file = fitting.get("savefile") + log.info(" > Saving the weights for future in {0}".format(model_file)) + training["model"].save_weights(model_file) + + # Plot the validation and the training losses + if fitting.get("plot"): + validation_object.plot() + + # print out the integration of the sum rule in case we want to check it's not broken + # msr_constraints.check_integration(layer_pdf, integrator_input) diff --git a/n3fit/src/n3fit/hyper_optimization/HyperAlgorithm.py b/n3fit/src/n3fit/hyper_optimization/HyperAlgorithm.py new file mode 100644 index 0000000000..df593c05d5 --- /dev/null +++ b/n3fit/src/n3fit/hyper_optimization/HyperAlgorithm.py @@ -0,0 +1,461 @@ +import itertools +import numpy as np + + +class HyperAlgorithm: + def __init__( + self, + key_info, + verbose=True, + k_good="good", + k_loss="loss", + threshold_reward=0.0, + fail_threshold=75, + how_many_to_kill=2, + remove_by_key=False, + ): + """ + key_info: dictionary of {'keyname' : [list,of,possible,values]} + """ + + self.key_info = key_info + + self.k_good = k_good + self.k_loss = k_loss + + self.verbose = verbose + self.remove_by_key = remove_by_key + + self.threshold_reward = threshold_reward + self.failing_reward = -100 + self.fail_threshold = fail_threshold + + self.how_many_to_kill = how_many_to_kill + + # Some variables are not discrete, find them + update_dict = {} + for key, values in key_info.items(): + try: # Remove nan if there are + np_values = np.array(values) + np_values = np_values[~np.isnan(np_values)] + except: + np_values = values + l = len(np_values) + if l > 10: # break it into five chuncks + np_values.sort() + new_vals = [] + step = int(l / 5) + for i in range(0, l - step, step): + ini = np_values[i] + fin = np_values[i + step] + new_vals.append((ini, fin)) + update_dict[key] = new_vals + key_info.update(update_dict) + + self.continuous = update_dict + self.biggest_total = 1 + self.perfect_reward = 100.0 + + def __dataframe_killer(self, dataframe, hit_list): + """ + Remove from the dataframe all entries included in the hit_list + """ + if not hit_list: + return dataframe + indices_to_remove = hit_list[0]["slice"].index + for victim in hit_list[1:]: + indices_to_remove = indices_to_remove.append(victim["slice"].index) + indices_to_drop = indices_to_remove.drop_duplicates() + return dataframe.drop(indices_to_drop) + + def __verbose_elimination(self, md, printout=True): + """ + Show a log of everything that is being eliminated + if there is only one item in the dictionary, + it also removes it from the combination list just to avoid looking at useless combinations + """ + combination = md["combination"] + str_list = [] + for key, item in combination.items(): + str_list.append("{0}={1}".format(key, item)) + reward = md["reward"] + if printout: + self.__print("Removing: {0}\n reward: {1:.5f}".format("\n ".join(str_list), reward)) + + def __print(self, string, **kwargs): + if self.verbose: + print(string, **kwargs) + + def __compute_reward(self, md): + """ Computes a reward function from the information we have """ + frate = md["f_rate"] + ngood = md["n_good"] + ntotal = md["n_total"] + # If the fail rate is greater than 75%, kill it + if frate > self.fail_threshold: + return self.failing_reward + dstd = md["median"] - md["best_loss"] + rate_good = ngood / ntotal * 100.0 + + # The reward can be negative + reward = (50.0 - frate) / 100.0 + reward -= dstd / rate_good # the less spread the better + # Finally scale it with the total number of points + weight_points = 1.0 + ntotal / self.biggest_total + reward *= weight_points + + return reward + + def print_ranges(self, dataframe, keys=None, dataframe_original=None): + """ + Receives a dataframe and a number of keys and prints the surviving keys and ranges + If dataframe_original is given, will also print (for discrete values) + how many of each have survived from the total + """ + # First get the keys we are printing + keys = self.key_info.keys() + + # Now compute all rewards + key_rewards = {} + for key in keys: + combinations = self.get_combinations(1, single_key=key) + key_rewards[key] = [] + for combination in combinations: + md = self.study_combination(dataframe, combination) + key_rewards[key].append(md) + + if dataframe_original is None: + prev = "" + else: + prev = "of {0}".format(len(dataframe_original)) + print("The total number of survivors is {0} {1}".format(len(dataframe), prev)) + + # Now do the printing + for key, results in key_rewards.items(): + print("For key: {0}".format(key)) + list_str = [] + results.sort(key=lambda i: -i["reward"]) + for md in results: + val = md["combination"][key] + reward = md["reward"] + list_str.append(" Reward: {1:.3f}: {0} ".format(val, reward)) + print("\n".join(list_str)) + return + + for key in keys: + df_slice = dataframe[key] + values = set(df_slice) + if len(values) > 5: + ini = df_slice.min() + fin = df_slice.max() + string = " range: ({0}, {1}) ".format(ini, fin) + else: + if dataframe_original is not None: + originals = dataframe_original[key].value_counts() + news = df_slice.value_counts() + string = " values: " + for val in values: + string += "{0} ({1}/{2} {3:2.0f}%), ".format( + val, news[val], originals[val], news[val] / originals[val] * 100.0 + ) + string = string[:-2] + else: + string = " values: {0}".format(values) + + self.__print("Key: {0}\n > {1}".format(key, string)) + + def __process_slice(self, df_slice): + """ Function to process a slice into a dictionary with interesting stats + If the slice is None it means the combination does not apply + """ + if df_slice is None: + md = {"skip": True} + return md + else: + md = {"skip": False} + + good = df_slice[df_slice[self.k_good]] + md["n_total"] = len(df_slice) + md["n_good"] = len(good) + if md["n_good"] > self.biggest_total: + self.biggest_total = md["n_good"] + md["n_failed"] = md["n_total"] - md["n_good"] + if md["n_total"] == 0: + md["skip"] = True + return md + else: + f_rate = md["n_failed"] / md["n_total"] * 100 + md["f_rate"] = f_rate + # Compute for each the best loss (among the good ones) and the std dev + good_losses = good[self.k_loss] + md["best_loss"] = good_losses.min() + md["std"] = good_losses.std() + md["median"] = good_losses.median() + md["reward"] = self.__compute_reward(md) + return md + + def get_slice(self, dataframe, keys_info): + """ + Returns a slice of the dataframe where some keys match some values + keys_info must be a dictionary {key1 : value1, key2, value2 ...} + """ + df_slice = dataframe + for key, value in keys_info.items(): + # First check whether for this slice the values for this key are something different from NaN + key_column = df_slice[key] + if len(key_column.dropna()) == 0 and len(key_column) != 0: + return None + if key in self.continuous.keys(): + minim = value[0] + maxim = value[1] + df_slice = df_slice[key_column >= minim] + df_slice = df_slice[key_column <= maxim] + else: + df_slice = df_slice[key_column == value] + return df_slice + + def get_combinations(self, n, key_info=None, single_key=None): + """ + Get all combinations of n elements for the keys and values given in the dictionary key_info: + { 'key' : [possible values] }, if key_info is None, the dictionary used to initialize the class will be used + Returns a list of dictionaries such that: + [ + {'key1' : val1-1, 'key2', val2-1 ... }, + {'key1' : val1-2, 'key2', val2-1 ... }, + ... + ] + """ + if key_info is None: + key_info = self.key_info + if len(key_info) < n: + return [] + + if single_key: + single_options = key_info[single_key] + key_info = {single_key: single_options} + + key_combinations = itertools.combinations(key_info, n) + all_combinations = [] + for keys in key_combinations: + list_of_items = [key_info[key] for key in keys] + # list_of_items is a list of possible values: [ (val1-1, val1-2, val1-3...), (val2-1, val2-2...) ... ] + items_combinations = itertools.product(*list_of_items) + for values in items_combinations: + nd = dict(zip(keys, values)) + all_combinations.append(nd) + + return all_combinations + + def study_combination(self, dataframe, combination): + """ + Given a dataframe and a dictionary of {key1 : value1, key2: value2} + returns a dictionary with a number of stats for that combination + """ + df_slice = self.get_slice(dataframe, combination) + # If the combination does not applies (i.e., NaN) just skip it + md = self.__process_slice(df_slice) + md["slice"] = df_slice + md["combination"] = combination + return md + + def __sort_to_survive(self, info_dict, how_many_to_save): + """ + Receives a list of dictionaries with an entry called reward, returns the + 'n' best + """ + sorted_by_reward = sorted(info_dict, key=lambda i: i["reward"]) + sorted_by_reward.reverse() + # Everyone with perfect reward is saved + save_list = [] + for i in sorted_by_reward: + if how_many_to_save == 0: + break + if i["reward"] == self.perfect_reward: + save_list.append(i) + else: + save_list.append(i) + how_many_to_save -= 1 + return save_list + + def __sort_to_kill(self, info_dict, by_key=False, how_many_to_kill=None): + """ + Receives a list of dictionaries with an entry called reward, returns the + 'n' worst ones. + If by key is True, returns the 'n' worst for each set of keys + """ + if how_many_to_kill is None: + how_many_to_kill = self.how_many_to_kill + if by_key: + list_by_key = {} + for md in info_dict: + keys = md["keys"] + if keys in list_by_key.keys(): + list_by_key[keys].append(md) + else: + list_by_key[keys] = [md] + hit_list = [] + for _, items in list_by_key.items(): + hit_list += self.__sort_to_kill(items) + else: + sorted_by_reward = sorted(info_dict, key=lambda i: i["reward"]) + hit_list = sorted_by_reward[:how_many_to_kill] + return hit_list + + def remove_failing(self, dataframe, n_combinations=1, key=None, fail_threshold=None, verbose=True): + """ + Remove entries with failing reward from the dataframe + """ + if fail_threshold is None: + set_fail_threshold = self.failing_reward + elif fail_threshold == "median": + set_fail_threshold = 100 + else: + set_fail_threshold = fail_threshold + combinations = self.get_combinations(n_combinations, single_key=key) + hit_list = [] + all_combinations = [] + + for combination in combinations: + md = self.study_combination(dataframe, combination) + if md["skip"]: + continue + reward = md["reward"] + if fail_threshold == "median": + all_combinations.append(md) + elif reward <= set_fail_threshold: + hit_list.append(md) + + if fail_threshold == "median": + sorted_by_reward = sorted(all_combinations, key=lambda i: i["reward"]) + # do like thanos + half = int(len(sorted_by_reward) / 2) + hit_list = sorted_by_reward[:half] + + new_dataframe = self.__dataframe_killer(dataframe, hit_list) + if verbose: + for i in hit_list: + print("Eliminated {0} with reward: {1}".format(i["combination"], i["reward"])) + return new_dataframe + + def check_wrapper_weaklings_killer(self, dataframe, n_combinations=1, key=None, verbose=False, kill_threshold=None): + """ + It starts killing the entry with the worst reward and won't stop until all remaining entries + for the given number of combinations, have a positive reward + """ + if kill_threshold is None: + set_kill_threshold = self.threshold_reward + elif kill_threshold is "median": + set_kill_threshold = 100 # ensures that someone dies here + else: + set_kill_threshold = kill_threshold + + # 1 - Get combinations + combinations = self.get_combinations(n_combinations, single_key=key) + # 2 - Run through all possible combinations and kill the weakest one + worst_reward = 1e5 + victim = None + all_combinations = [] + for combination in combinations: + md = self.study_combination(dataframe, combination) + if md["skip"]: + continue + all_combinations.append(md) + reward = md["reward"] + if reward < worst_reward and reward <= set_kill_threshold: + victim = md + worst_reward = reward + if verbose: + all_combinations.sort(key=lambda i: i["reward"]) + for i in all_combinations: + print("{0}, {1}".format(i["reward"], i["combination"])) + + if kill_threshold == "median": # Set the median which will be necessary for the rest of the iterations + rewards = [i["reward"] for i in all_combinations] + kill_threshold = np.median(rewards) + print(" > > Killing everything with reward below {0}".format(kill_threshold)) + + if victim is None: + # If there are no points below the reward threshold, go out and return the remaining dataframe + return dataframe + + # 3 - Kill the victim(s) + new_dataframe = self.__dataframe_killer(dataframe, [victim]) + self.__verbose_elimination(victim) + + # 4 - Keep on murdering + return self.check_wrapper_weaklings_killer( + new_dataframe, n_combinations, key=key, kill_threshold=kill_threshold + ) + + def check_wrapper(self, dataframe, n_combinations, kill=False, save_n_best=None): + """ + Input a dataframe and the number of possible combinations and prints out nice statistics + """ + + def get_str(comb, md): + key_strs = ["{0}={1}".format(key, item) for key, item in comb.items()] + comb_str = ", ".join(key_strs) + f_rate = md["f_rate"] + reward = md["reward"] + n_trials = md["n_total"] + failure_str = "Fail: {0:2.3f}% Reward: {2:0.3f} ({1:3d} trials)".format(f_rate, n_trials, reward) + return comb_str, failure_str, f_rate + + self.__print("\n > Check combinations of {0} keys".format(n_combinations)) + + # 1 - Get all possible combinations of the keys + combinations = self.get_combinations(n_combinations) + lines_to_print = [] + siz = 0 + # 2 - Now run for all possible combinations and parse the information + hit_list = [] + info_dict = [] + for combination in combinations: + md = self.study_combination(dataframe, combination) + if md["skip"]: + continue + reward = md["reward"] + if reward <= self.threshold_reward: + # Don't bother with this point, kill it later + hit_list.append(md) + else: + md["keys"] = tuple(sorted((combination.keys()))) + info_dict.append(md) + comb_str, fail_str, _ = get_str(combination, md) + if len(comb_str) > siz: + siz = len(comb_str) + lines_to_print.append((comb_str, fail_str, reward)) + + # 3 - If kill == true, remove the slices in the hit list + if kill: + # 3.1 - Sort the items by reward and get the 'how_many_to_kill' with the smallest reward + hit_list += self.__sort_to_kill(info_dict, by_key=self.remove_by_key) + print("Removing the worst combinations:") + new_dataframe = self.__dataframe_killer(dataframe, hit_list) + else: + new_dataframe = dataframe + + # 4 - If save_n_best, do the opposite + if save_n_best: + survivors_list = self.__sort_to_survive(info_dict, save_n_best) + # Everyone with a 100.0 reward must survive + indices_to_get = None + for survivor in survivors_list: + if indices_to_get is not None: + indices_to_get = indices_to_get.append(survivor["slice"].index) + else: + indices_to_get = survivor["slice"].index + indices_to_get = indices_to_get.drop_duplicates() + new_dataframe = dataframe.ix[indices_to_get] + + # 5 - Now print all lines, making sure the align nicely + print("\n") + lines_to_print.sort(key=lambda x: x[-1]) + # If saving only the best X, print only those + if save_n_best: + lines_to_print = lines_to_print[-save_n_best:] + base_line = "{0:" + str(siz) + "s} {1}" + for line in lines_to_print: + self.__print(base_line.format(line[0], line[1])) + + return new_dataframe diff --git a/n3fit/src/n3fit/hyper_optimization/HyperScanner.py b/n3fit/src/n3fit/hyper_optimization/HyperScanner.py new file mode 100644 index 0000000000..f9da6098cd --- /dev/null +++ b/n3fit/src/n3fit/hyper_optimization/HyperScanner.py @@ -0,0 +1,230 @@ +import hyperopt as hyper +import numpy as np + +from n3fit.backends import MetaModel, MetaLayer + + +class HyperScanner: + """ + The HyperScanner generates a dictionary of parameters for scanning + It takes cares of known correlation between parameters by tying them together + It also provides methods for updating the parameter dictionaries after using hyperopt + + It manages: + stopping + optimizer + positivity + + NN architecture (WIP) + """ + + def __init__(self, parameters): + """ + Takes as input a dictionary of parameters + and saves it + """ + self.parameters = parameters + self.keys = parameters.keys() + self.hyper_keys = set([]) + self.dict_keys = set([]) + + def dict(self): + return self.parameters + + def update_dict(self, newdict): + self.parameters.update(newdict) + # Since some hyperparameters were made in the form of dictionaries, we need to "sviluppare" them + for key in self.dict_keys: + if key in newdict.keys(): + value = newdict[key] + # But this might be a conditional thing and not all of the possible options be dictionaries + if isinstance(value, dict): + self.update_dict(value) + + def _update_param(self, key, value, hyperopt=True, dkeys=None): + if dkeys is None: + dkeys = [] + if key not in self.keys: + raise ValueError( + "Trying to update a parameter that was not declared in the dictionary: {0}. HyperScanner @ _update_param".format( + key + ) + ) + if hyperopt: + self.hyper_keys.add(key) + print("Adding the key {0} with the following value: {1}".format(key, value)) + # Add possible extra keys + _ = [self.dict_keys.add(nk) for nk in dkeys] + self.parameters[key] = value + + # Calling the functions below turn parameters of the dictionary into hyperparameter scans + def stopping(self, min_epochs=5e3, max_epochs=30e3, min_patience=0.10, max_patience=0.3): + """ + Modifies the following entries of the parameter dictionary: + - epochs + - stopping_patience + """ + epochs_key = "epochs" + stopping_key = "stopping_patience" + + epoch_step = (max_epochs - min_epochs) / 4 + if min_epochs < epoch_step: + epoch_step = min_epochs + + epochs = hyper.hp.quniform(epochs_key, min_epochs, max_epochs, min_epochs) + stopping_patience = hyper.hp.quniform(stopping_key, min_patience, max_patience, 0.05) + + self._update_param(epochs_key, epochs) + self._update_param(stopping_key, stopping_patience) + + def optimizer(self, names=None, min_lr=0.0005, max_lr=0.5): + # But this might be a conditional thing and not all of the possible options be dictionaries + """ + This function look at the optimizers implemented in MetaModel and adds the learning rate to (only) + those who use it. The special keyword "ALL" will make it use all optimizers implemented + - optimizer + - learning rate + """ + if names is None: + names = ["RMSprop"] + + opt_key = "optimizer" + lr_key = "learning_rate" + + optimizer_dict = MetaModel.optimizers + choices = [] + + min_lr_exp = np.log(min_lr) + max_lr_exp = np.log(max_lr) + lr_choice = hyper.hp.loguniform(lr_key, min_lr_exp, max_lr_exp) + + if names == "ALL": + names = optimizer_dict.keys() + + for opt_name in names: + if opt_name not in optimizer_dict.keys(): + # Do we want an early crash or just drop the optimizer silently? We'll see... + raise NotImplementedError("HyperScanner: Optimizer {0} not implemented in MetaModel.py".format(opt_name)) + + args = optimizer_dict[opt_name][1] + if "lr" in args.keys(): + choices.append({opt_key: opt_name, lr_key: lr_choice}) + else: + choices.append(opt_name) + + opt_val = hyper.hp.choice(opt_key, choices) + # Tell the HyperScanner this key might contain a dictionary to store it separately + self._update_param(opt_key, opt_val, dkeys=[opt_key]) + + def positivity(self, min_multiplier=1.01, max_multiplier=1.3, min_initial=0.5, max_initial=100): + """ + Modifies + - pos_multiplier + - pos_initial + """ + mul_key = "pos_multiplier" + if not min_initial and not max_initial: + ini_key = None + else: + ini_key = "pos_initial" + + mul_val = hyper.hp.uniform(mul_key, min_multiplier, max_multiplier) + self._update_param(mul_key, mul_val) + + if ini_key: + min_initial = np.log(min_initial) + max_initial = np.log(max_initial) + ini_val = hyper.hp.loguniform(ini_key, min_initial, max_initial) + self._update_param(ini_key, ini_val) + + def NN_architecture( + self, + n_layers=None, + max_units=50, + min_units=5, + activations=None, + initializers=None, + layer_types=None, + max_drop=0.0, + ): + """ + Uses all the given information to generate the parameters for the NN + - nodes_per_layer + - activation_per_layer + - initializer + - layer_type + - dropout + """ + if activations is None: + activations = ["sigmoid", "tanh"] + if initializers is None: + initializers = ["glorot_normal"] + if n_layers is None: + n_layers = [1, 2, 5] + if layer_types is None: + layer_types = ["dense"] + # Do we want to fix or generate dinamically the units? for now let's fix + # there will be time for more sofisticated functions + + act_key = "activation_per_layer" + nodes_key = "nodes_per_layer" + # the information contained in the variable is indeed the number of nodes per layer e.g. [5,10,30] + # but the information the user will be interested in is how many layers are there... + + # Generate the possible activation choices + act_choices = [] + for afun in activations: + # Just generate an array with as many copies of the str as the maximum number of layers + cop = [afun] * n_layers[-1] + # And append a linear at the end + cop.append("linear") + act_choices.append(cop) + + nodes_choices = [] + for n in n_layers: + units = [] + for i in range(n): + # We can even play games as lowering the maximum as the number of layers grows + units_label = "nl{1}:-{0}/{1}".format(i, n) + units.append(hyper.hp.quniform(units_label, min_units, max_units, 5)) + + # And then the last one is a dense with 8 units + units.append(8) + nodes_choices.append(units) + + act_functions = hyper.hp.choice(act_key, act_choices) + nodes = hyper.hp.choice(nodes_key, nodes_choices) + + # Now let's select the initializers looking at the ones implemented in MetaLayer + ini_key = "initializer" + imp_inits = MetaLayer.initializers + imp_init_names = imp_inits.keys() + if initializers == "ALL": + initializers = imp_init_names + + ini_choices = [] + for ini_name in initializers: + if ini_name not in imp_init_names: + # Do we want an early crash or just drop the optimizer silently? We'll see... + raise NotImplementedError("HyperScanner: Initializer {0} not implemented in MetaLayer.py".format(ini_name)) + # For now we are going to use always all initializers and with default values + ini_choices.append(ini_name) + + ini_choice = hyper.hp.choice(ini_key, ini_choices) + + # Finally select the layer types + if layer_types: + layer_key = "layer_type" + layer_choices = hyper.hp.choice(layer_key, layer_types) + self._update_param(layer_key, layer_choices) + + # And add the dropout parameter + drop_key = "dropout" + n_drops = 3 + drop_step = max_drop / n_drops + drop_val = hyper.hp.quniform(drop_key, 0.0, max_drop, drop_step) + + self._update_param(act_key, act_functions) + self._update_param(nodes_key, nodes) + self._update_param(ini_key, ini_choice) + self._update_param(drop_key, drop_val) diff --git a/n3fit/src/n3fit/hyper_optimization/__init__.py b/n3fit/src/n3fit/hyper_optimization/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/n3fit/src/n3fit/hyper_optimization/create_testset.py b/n3fit/src/n3fit/hyper_optimization/create_testset.py new file mode 100644 index 0000000000..9891577d92 --- /dev/null +++ b/n3fit/src/n3fit/hyper_optimization/create_testset.py @@ -0,0 +1,105 @@ +import os +import numpy as np + +from reportengine.compat import yaml + + +def test_runcard(runcard_file, datasets_out): + """ + Given a runcard and a set of datasets, generate a TEST-runcard where the datasets in datasets_out are removed + from the training and put together as a new experiment called TEST + """ + # Load the input runcard as a dictionary + f = open(runcard_file, "r") + runcard_dict = yaml.safe_load(f) + runcard_exp = runcard_dict["experiments"] + f.close() + + # Generate the list of datasets which are to be left out + data_tests = [] + for dataset in datasets_out: + data_tests.append({"dataset": dataset, "frac": 1.0}) # TODO copy the dictionary from the runcard instead + + # Create a test experiment + from n3fit.ModelTrainer import TESTNAME + test_experiment = { + 'experiment' : TESTNAME, + 'datasets' : data_tests + } + + # Now drop experiments and datasets that are being used for training + for experiment in runcard_exp: + datasets = list(filter(lambda x: x["dataset"] not in datasets_out, experiment["datasets"])) + if datasets: + experiment["datasets"] = datasets + else: + runcard_exp.remove(experiment) + + # Generate the new runcard + # TODO: maybe there is a proper report-enginy way of doing this + runcard_exp.append(test_experiment) + new_runcard = "TEST-{0}".format(os.path.basename(runcard_file)) + no = open(new_runcard, "w") + yaml.round_trip_dump(runcard_dict, no, explicit_start=True, explicit_end=True, default_flow_style=True) + no.close() + + print("\n > You can find your new runcard at {0}/{1}\n".format(os.getcwd(), new_runcard)) + + +def create_testset(experiments, runcard_file="runcards/NNPDF31_nlo_as_0118.yml"): + """ + This function gets the list of experiment and goes through the datasets + reading their experiment type and range in x + Depending on these, chooses the ones to leave out for testing + """ + + # 1) Read all datasets in the runcard and classify them by type + dataset_by_proc = {} + all_experiments = {} + for exp in experiments: + all_experiments[str(exp)] = len(exp.datasets) + for dataset in exp.datasets: + data_c = dataset.load() + proc_type = data_c.GetProc(0) + fktable = data_c.GetFK(0) + xgrid = fktable.get_xgrid() + xmin = np.min(xgrid) + xmax = np.max(xgrid) + data_dict = { + 'name' : str(dataset), + 'xmin' : xmin, + 'xmax' : xmax, + 'exp' : str(exp), + } + if proc_type in dataset_by_proc.keys(): + dataset_by_proc[proc_type].append(data_dict) + else: + dataset_by_proc[proc_type] = [data_dict] + + select_min = False # If true selects the datasets with the smallest min(x), recommended: False + + # 2) Now for every process type with more than one dataset, leave out the smallest + datasets_out = [] + for proc_type, datasets in dataset_by_proc.items(): + l = len(datasets) + if l != 1: + xmin = 1.0 + xmax = 0.0 + for dataset in datasets: + xm = dataset["xmin"] + if select_min: + if xm < xmin: + xmin = xmin + data_out = dataset + else: + if xm > xmax: + xmax = xm + data_out = dataset + print("For {0} we find {1} datasets, leaving one out: {2}".format(proc_type, l, data_out)) + datasets_out.append(data_out["name"]) + all_experiments[dataset["exp"]] -= 1 + + if not datasets_out: + print("No redundant datasets were found") + else: + test_runcard(runcard_file, datasets_out) diff --git a/n3fit/src/n3fit/hyper_optimization/filetrials.py b/n3fit/src/n3fit/hyper_optimization/filetrials.py new file mode 100644 index 0000000000..17ee1a0809 --- /dev/null +++ b/n3fit/src/n3fit/hyper_optimization/filetrials.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" + Custom hyperopt trial object for persistent file storage +""" +import json +from hyperopt import Trials, space_eval + + +def space_eval_trial(space, trial): + for_eval = {} + for k, v in trial["misc"]["vals"].items(): + if len(v) == 0: + for_eval[k] = None + else: + for_eval[k] = v[0] + return space_eval(space, for_eval) + + +class FileTrials(Trials): + """ + Stores trial results on the fly. + """ + + def __init__(self, replica_path, log=None, parameters=None, exp_key=None, refresh=True): + self._store_trial = False + self._json_file = "{0}/tries.json".format(replica_path) + self._parameters = parameters + if log: + self.log_info = log.info + else: + self.log_info = print + super(FileTrials, self).__init__(exp_key=exp_key, refresh=refresh) + + def refresh(self): + super(FileTrials, self).refresh() + + # write json to disk + if self._store_trial: + self.log_info(f"Storing scan in {self._json_file}") + local_trials = [] + for idx, t in enumerate(self._dynamic_trials): + local_trials.append(t) + local_trials[idx]["misc"]["space_vals"] = space_eval_trial(self._parameters, t) + + all_to_str = json.dumps(local_trials, default=str) + with open(self._json_file, "w") as f: + f.write(all_to_str) + + def new_trial_ids(self, N): + self._store_trial = False + return super(FileTrials, self).new_trial_ids(N) + + def new_trial_docs(self, tids, specs, results, miscs): + self._store_trial = True + return super(FileTrials, self).new_trial_docs(tids, specs, results, miscs) diff --git a/n3fit/src/n3fit/hyper_optimization/plotting.py b/n3fit/src/n3fit/hyper_optimization/plotting.py new file mode 100755 index 0000000000..3e1dbfc59f --- /dev/null +++ b/n3fit/src/n3fit/hyper_optimization/plotting.py @@ -0,0 +1,701 @@ +#!/usr/bin/env python3 + +""" + Script to plot the hyperopt scans +""" + +import os +import json +import warnings +import re + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns + +warnings.simplefilter(action="ignore", category=FutureWarning) +regex_op = re.compile(r"[^\w^\.]+") +regex_not_op = re.compile(r"[\w\.]+") + +keywords = { + 'id' : 'iteration', + + 'optimizer' : 'optimizer', + 'lr' : 'learning_rate', + 'initializer' : 'initializer', + 'dropout' : 'dropout', + + 'nodes' : 'nodes_per_layer', + 'max_layers' : 4, + 'nl' : 'number_of_layers', + 'activation' : 'activation_per_layer', + 'architecture' : 'layer_type', + + 'epochs' : 'epochs', + 'stp' : 'stopping_patience', + 'ste' : 'stopping_epochs', + 'p_ini' : 'pos_initial', + 'p_mul' : 'pos_multiplier', + + 'good' : 'good', + 'vl' : 'validation_loss', + 'tl' : 'testing_loss', + 'loss' : 'loss', + } + +# 0 = normal scatter plot, 1 = violin, 2 = log +plotting_keys = [ + (keywords['id'], 0), + (keywords['optimizer'], 1), + (keywords['lr'], 2, [2e-4, 4e-1]), + (keywords['initializer'], 1), + + (keywords['epochs'], 0), + (keywords['ste'], 0), + (keywords['stp'], 0), + (keywords['p_mul'], 0), + + (keywords['nl'], 1), + (keywords['activation'], 1), + (keywords['dropout'], 0), + (keywords['tl'], 0), + + ] + +# for i in range(4): +# plotting_keys.append( ('layer_{0}'.format(i+1),0) ) + + +# Family parsing +def optimizer_parse(trial, dict_out): + """ + This function parses the family of parameters which regards the optimizer: + + optimizer + learning_rate (when it exists) + initializer + dropout + """ + opt = trial["misc"]["space_vals"][keywords["optimizer"]] + if isinstance(opt, dict): + # if it is a dict the optimizer includes extra information + name = opt[keywords["optimizer"]] + lr = opt[keywords["lr"]] + else: + name = opt + lr = None + ini = trial["misc"]["space_vals"][keywords["initializer"]] + dropout_rate = trial["misc"]["space_vals"][keywords["dropout"]] + + dict_out[keywords["optimizer"]] = name + dict_out[keywords["lr"]] = lr + dict_out[keywords["initializer"]] = ini + dict_out[keywords["dropout"]] = dropout_rate + + +def architecture_parser(trial, dict_out): + """ + This function parses the family of parameters which regards the architecture of the NN + + number_of_layers + activation_per_layer + nodes_per_layer + l1, l2, l3, l4... max_layers + layer_type + """ + nodes_per_layer = trial["misc"]["space_vals"][keywords["nodes"]] + nl = len(nodes_per_layer) - 1 + activation_name = trial["misc"]["space_vals"][keywords["activation"]][0] + architecture = trial["misc"]["space_vals"][keywords["architecture"]] + + dict_out[keywords["nodes"]] = nodes_per_layer + dict_out[keywords["nl"]] = nl + dict_out[keywords["activation"]] = activation_name + dict_out[keywords["architecture"]] = architecture + + # In principle we might be checking any number of layers, + # so it is important to make sure that the maximum number of layers + # (which will be used at plotting time) is correct + if nl > keywords["max_layers"]: + keywords["max_layers"] = nl + + for i in range(keywords["max_layers"]): + dict_out["layer_{0}".format(i + 1)] = None + + for i, nodes in enumerate(nodes_per_layer[:-1]): + dict_out["layer_{0}".format(i + 1)] = nodes + + +def stopping_parser(trial, dict_out): + """ + Parses the family of parameters that define the stopping + + epochs + stopping_patience + pos_initial + pos_multiplier + """ + + epochs = trial["misc"]["space_vals"][keywords["epochs"]] + patience = trial["misc"]["space_vals"][keywords["stp"]] + stop_ep = patience * epochs + positivity_initial = trial["misc"]["space_vals"][keywords["p_ini"]] + positivity_multiplier = trial["misc"]["space_vals"][keywords["p_mul"]] + + dict_out[keywords["epochs"]] = epochs + dict_out[keywords["stp"]] = patience + dict_out[keywords["ste"]] = stop_ep + dict_out[keywords["p_ini"]] = positivity_initial + dict_out[keywords["p_mul"]] = positivity_multiplier + + +def stat_parser(trial, dict_out, threshold=1e3, val_f=0.5, **kwargs): + """ + Parser the stats information of the trial, the losses, the failures, etc + """ + test_f = 1.0 - val_f + validation_loss = trial["result"][keywords["vl"]] + testing_loss = trial["result"][keywords["tl"]] + loss = validation_loss * val_f + testing_loss * test_f + + if testing_loss < threshold and validation_loss < threshold: + good_result = True + else: + good_result = False + + # Was this a ok run? + if trial["result"]["status"] != "ok": + ok = False + else: + ok = good_result + + if not good_result: + # Set bad results to be an order of magnitude above everything else + loss = threshold * 10 + + dict_out[keywords["good"]] = ok + dict_out[keywords["loss"]] = loss + dict_out[keywords["vl"]] = validation_loss + dict_out[keywords["tl"]] = testing_loss + + +def filter_by_string(data_dict, filter_string): + """ + Receives a data_dict (a parsed trial) and a filter string, returns True if the trial passes the filter + + filter string must have the format: key[]string + where [] can be any of !=, =, >, < + """ + match = regex_not_op.findall(filter_string) + if len(match) < 2: + raise ValueError("Filter str is not correct: {0}".format(filter_string)) + filter_key = match[0] + filter_val = match[1] + + if filter_key in keywords.keys(): + filter_key = keywords[filter_key] + + val_check = data_dict[filter_key] + if val_check is None: # NaN means it does not apply + return True + + operator = regex_op.findall(filter_string)[0] + + if operator == "=": + operator = "==" + operators = ["!=", "==", ">", "<"] + if operator not in operators: + raise NotImplementedError("Filter string not valid, operator not recognized {0}".format(filter_string)) + + # This I know it is not ok: + if isinstance(val_check, str) and isinstance(filter_val, str): + check_str = '"{0}"{1}"{2}"' + else: + check_str = "{0}{1}{2}" + try: + return eval(check_str.format(val_check, operator, filter_val)) + except: + return False + + +# General parsing +def trial_parser(trial, filter_me=None, **kwargs): + """ + Trials are very convoluted object, very branched inside + The goal of this function is to separate said branching so we can create hierarchies + """ + # Is this a true trial + if trial["state"] != 2: + return None + + data_dict = {} + # Parse all information in easy to use dictionaries + optimizer_parse(trial, data_dict) + architecture_parser(trial, data_dict) + stopping_parser(trial, data_dict) + stat_parser(trial, data_dict, **kwargs) + + # If a filter is set, check whether the trial pass the filter + if filter_me: + for filter_str in filter_me: + if not filter_by_string(data_dict, filter_str): + return None + + return data_dict + + +def filter_trials(input_trials, **kwargs): + """ + Applies the search algorithm to all the different trials to find the true best model + """ + # First step is to fill in two different + failed_trials = [] + ok_trials = [] + + full_info = [] + for tid, trial in enumerate(input_trials): + trial_dict = trial_parser(trial, **kwargs) + if trial_dict is None: + continue + trial_dict[keywords["id"]] = tid + full_info.append(trial_dict) + + return full_info + + +def order_axis(df, bestdf, key): + """ + Helper function for ordering the axis and make sure the best is always first + """ + best_x_lst = bestdf.get(key).tolist() + ordering = set(df.get(key).tolist()) + ordering.remove(best_x_lst[0]) + ordering_true = best_x_lst + list(ordering) + best_x = np.array([str(best_x_lst[0])]) + return ordering_true, best_x + + +def plot_scans(df, best_df, outfile): + """ + This function plots all trials in a nice multiplot + """ + # Some printouts + print("All trials:") + print(df) + + print("Best setup:") + with pd.option_context("display.max_rows", None, "display.max_columns", None): + print(best_df) + + # Append extra keys for the number of possible layers + for i in range(keywords["max_layers"]): + plotting_keys.append(("layer_{0}".format(i + 1), 4)) + + # Create a grid of plots + nplots = len(plotting_keys) + _, axs = plt.subplots(4, nplots // 4, sharey=True, figsize=(30, 30)) + + # Set the quantity we will be plotting in the y axis + loss_k = keywords["loss"] + + for ax, key_tuple in zip(axs.reshape(-1), plotting_keys): + key = key_tuple[0] + mode = key_tuple[1] + + if mode == 0 or mode == 2: # normal scatter plot + ax = sns.scatterplot(x=key, y=loss_k, data=df, ax=ax) + best_x = best_df.get(key) + if mode == 2: + ax.set_xscale("log") + ax.set_xlim(key_tuple[2]) + elif mode == 1: + ordering_true, best_x = order_axis(df, best_df, key=key) + ax = sns.violinplot(x=key, y=loss_k, data=df, ax=ax, palette="Set2", cut=0.0, order=ordering_true) + ax = sns.stripplot(x=key, y=loss_k, data=df, ax=ax, color="gray", order=ordering_true, alpha=0.4) + elif mode == 4: + # The nodes per layer is a special case because we want to know to + # which total number of layers they correspond + ordering_true, _ = order_axis(df, best_df, key=keywords["nl"]) + best_x = best_df.get(key) + for l in ordering_true: + plot_data = df[df[keywords["nl"]] == l] + label = "layers = {0}".format(l) + ax = sns.scatterplot(x=key, y=loss_k, data=plot_data, ax=ax, label=label) + ax.legend() + + # Finally plot the "best" one, which will be first + ax = sns.scatterplot(x=best_x, y=best_df.get(loss_k), ax=ax, color="orange", marker="s") + ax.set_ylabel("Loss") + ax.set_xlabel(key) + + plt.savefig(f"{outfile}", bbox_inches="tight") + + +def draw_histogram(dataframe, key, plotable="loss", columns=2): + values = set(dataframe[key]) + nv = len(values) + rows = np.ceil(nv / columns) + + for i, value in enumerate(values): + plt.subplot(rows, columns, i + 1) + data = dataframe[dataframe[key] == value] + plt.hist(data[plotable], 20) + plt.xlabel(value) + plt.show() + + +def print_stats( + dataframe, + stats_keys=["optimizer", "nl", "initializer", "activation"], + n_combinations=2, + verbose=True, + return_early=False, + how_many_to_kill=2, + remove_by_key=False, + save_n_best=10, + **kwargs, +): + """ + Print statistics on failure rates, averages and so + """ + + # Ok, at this point I have a dataframe with all the content from the json. + # if filters or changes on the threshold for the loss are active, the dataframe would + # already be filtered and all information ready + + def read_key(key): + if key in keywords.keys(): + key_name = keywords[key] + else: + key_name = key + return key_name + + def kclean(l): + return [read_key(i) for i in l] + + def remove_key(dictionary, key): + new_d = dictionary.copy() + new_d.pop(key) + return new_d + + def gen_key_info(full_dataframe, keys): + key_info = {} + for key_name in keys: + all_possible_values = full_dataframe[key_name] + remove_nans = filter(lambda x: x == x, all_possible_values) + remove_duplicates = sorted(list(set(remove_nans))) + if remove_duplicates: + key_info[key_name] = remove_duplicates + return key_info + + def get_slice(full_dataframe, key, value): + return full_dataframe[full_dataframe[key] == value] + + from HyperAlgorithm import HyperAlgorithm + + keys_lv_0 = kclean(["optimizer", "number_of_layers"]) + keys_lv_1 = kclean(["epochs", "stopping_patience", "activation"]) + keys_lv_2 = kclean(["p_mul", "p_ini", "learning_rate"]) + + keys_for_removal = keys_lv_0 + keys_lv_1 + + key_info_full = gen_key_info(dataframe, keys_for_removal) + hypalg = HyperAlgorithm(key_info=key_info_full) + trim_dataframe = hypalg.remove_failing(dataframe) + key_info_trim = gen_key_info(trim_dataframe, keys_for_removal) + hypalg = HyperAlgorithm(key_info=key_info_trim) + + decision_tree = {} + + key_0 = keys_lv_0[0] + options_0 = key_info_trim[key_0] + for val in options_0: + print("For {0}".format(val)) + full_slice = get_slice(trim_dataframe, key_0, val) + trim_slice = hypalg.remove_failing(full_slice) + for key in keys_for_removal[1:]: + trim_slice = hypalg.remove_failing(trim_slice, key=key, fail_threshold=0.0) + # At this point I've already removed the worst worst ones, now we can create the "decision_tree" + tree_key_info = gen_key_info(trim_slice, keys_lv_0[0:]) + decision_tree[val] = (tree_key_info, trim_slice) + + print("\n\n") + verbose = False + + final_decision_tree = {} + key_1 = keys_lv_0[1] + for val_0, info in decision_tree.items(): + keys = info[0] + f_slice = info[1] + options_1 = keys[key_1] + if verbose: + print("For {0}, accepted options are: {1}={2}".format(val_0, key_1, options_1)) + final_decision_tree[val_0] = {} + for val in options_1: + print(val) + trim_slice = get_slice(f_slice, key_1, val) + for key_name in keys_for_removal[2:]: + trim_slice = hypalg.remove_failing(trim_slice, key=key_name, fail_threshold=0.0, verbose=False) + # Save the final slice for this branch of the tree + final_decision_tree[val_0][val] = trim_slice + + # Now for each branch let's look at the rewards + final_answer = {} + for val_0, item_0 in final_decision_tree.items(): + if verbose: + print("For {0}".format(val_0)) + final_answer[val_0] = {} + for val_1, item_1 in item_0.items(): + if verbose: + print("\n\nFor {0}".format(val_1)) + # Now remove half of the combinations but only looking at the level_1 keys + key_info_lv1 = gen_key_info(item_1, keys_lv_1) + falg = HyperAlgorithm(key_info=key_info_lv1) + n1 = len(key_info_lv1) + trim_slice = falg.remove_failing(item_1, n_combinations=n1, fail_threshold="median", verbose=False) + # Now do the same with the level 2, remove the worst half + key_info_lv2 = gen_key_info(trim_slice, keys_lv_2) + falg = HyperAlgorithm(key_info=key_info_lv2) + n2 = len(key_info_lv2) + final_slice = falg.remove_failing(trim_slice, n_combinations=n2, fail_threshold="median", verbose=False) + # Now print the ranges + print_ranges = keys_lv_1 + keys_lv_2 + print( + """ +For {0} = {1} + {2} = {3}""".format( + key_0, val_0, key_1, val_1 + ) + ) + palg = HyperAlgorithm(key_info=gen_key_info(final_slice, print_ranges)) + palg.print_ranges(final_slice, dataframe_original=item_1) + + return + + # key_priority_raw = ["architecture", "optimizer", "number_of_layers", "epochs", "activation"] + key_priority_raw = [ + "optimizer", + "epochs", + "number_of_layers", + "learning_rate", + "activation", + "layer_1", + "layer_2", + "layer_3", + ] + key_priority = [read_key(i) for i in key_priority_raw] + # Now, for each key we need to get all the possible options + + key_info = gen_key_info(dataframe) + # Now that we have all the options we need to go in priority order + + # 1. Remove the failing entries (i.e., reward == failing reward) + hypalg = HyperAlgorithm(key_info=key_info) + trim_dataframe = hypalg.remove_failing(dataframe) + # 1.b Regenerate key_info + key_info = gen_key_info(trim_dataframe) + + # 2. Now fix the optimizer and continue + key_0 = key_priority[0] # optimizer + options_0 = key_info[key_0] + key_info_0 = remove_key(key_info, key_0) + hypalg = HyperAlgorithm(key_info=key_info_0) + for val in options_0: + print("\n\n") + print(val) + f_slice = trim_dataframe[trim_dataframe[key_0] == val] + # First remove all failing keys + trim_slice = hypalg.remove_failing(f_slice) + for key_name in key_priority[1:]: + print(">") + print(key_name) + trim_slice = hypalg.remove_failing(trim_slice, key=key_name, fail_threshold=0.0) + hypalg.print_ranges(trim_slice, dataframe_original=f_slice) + + set_trace() + + # Create a key_info dict with one entry removed for each key + key_infos = [] + last_k = key_info + for key_name in key_priority: + last_k = remove_key(last_k, key_name) + key_infos.append(last_k) + + key_0 = key_priority[0] + options_0 = key_info[key_0] + slices_0 = [] + key_info_0 = {key_priority[1]: key_info[key_priority[1]]} + hyperalg_0 = HyperAlgorithm(key_info=key_info) + hyperalg_0.check_wrapper_weaklings_killer(dataframe, 1) + for option_0 in options_0: + slices_0.append(dataframe[dataframe[key_0] == option_0]) + for slice_0 in slices_0: + # check the rewards, if some are particularly bad, kill them + _ = hyperalg_0.check_wrapper_weaklings_killer(slice_0, 1) + # entonce smatamos primero los epochs por ejemplo + # con los epochs malos eliminados, miramos el numbeor de layers + # y eliminamos los dos peores (nos quedamos con la mitad) + # luego para cada uno nos quedamos con el set de numero de nodos que tiene mejor pinta + # y asi + set_trace() + + def print_me(string): + if verbose: + print(string) + + def kill_val(key, name): + dataframe.loc[dataframe[key] == name, keywords["good"]] = False + + def process_slice(df_slice): + """ Function to process a slice into a dictionary with interesting stats """ + md = {} + good = df_slice[df_slice[keywords["good"]]] + md["n_total"] = len(df_slice) + md["n_good"] = len(good) + md["n_failed"] = md["n_total"] - md["n_good"] + if md["n_total"] == 0: + f_rate = None + else: + f_rate = md["n_failed"] / md["n_total"] * 100 + md["f_rate"] = f_rate + + # Compute for each the best loss (among the good ones) and the std dev + good_losses = good[keywords["loss"]] + md["best_loss"] = good_losses.min() + md["std"] = good_losses.std() + return md + + ############################### + + # Parse the keys into a dict of { 'keyname' : [possible, values] } + key_info = {} + for key in stats_keys: + if key in keywords.keys(): + key_name = keywords[key] + else: + key_name = key + possible_values = list( + set( + filter( lambda x: x == x, dataframe[key_name] ) + ) + ) + key_info[key_name] = possible_values + + from n3fit.hyper_optimization.HyperAlgorithm import HyperAlgorithm + + hyperalg = HyperAlgorithm( + key_info=key_info, fail_threshold=65, how_many_to_kill=how_many_to_kill, remove_by_key=remove_by_key + ) + + # First run over all keys and print_me the information for them to see the general picture + + # Perform a genocide for every reward below the threshold for 1 combination and then 2 the same with combinations of 2 + new_dataframe = hyperalg.check_wrapper_weaklings_killer(dataframe, 1) + new_dataframe = hyperalg.check_wrapper_weaklings_killer(new_dataframe, 2) + set_trace() + new_dataframe = hyperalg.check_wrapper_weaklings_killer(new_dataframe, 1) + new_dataframe = hyperalg.check_wrapper_weaklings_killer(new_dataframe, 2) + set_trace() + new_dataframe = hyperalg.check_wrapper_weaklings_killer(new_dataframe, 3) + + # for i in range(1, n_combinations): + # new_dataframe = hyperalg.check_wrapper(new_dataframe, i, kill = True) + # For the last one, only the n_best surviv + new_dataframe = hyperalg.check_wrapper(new_dataframe, n_combinations, kill=True, save_n_best=save_n_best) + + # Finally print the survivors + hyperalg.print_ranges(new_dataframe, dataframe_original=dataframe) + + +# Wrapper +def generate_scan_report_from_file( + replica_path, json_name="tries.json", include_failures=False, histogram=None, stats=False, **kwargs +): + """ + Read json file and generate a the corresponding scan report + """ + filename = "{0}/{1}".format(replica_path, json_name) + + # Open the file and reads it as a json + with open(filename, "r") as jlist: + input_trials = json.load(jlist) + + # Read all trials and create a list of dictionaries + trials_dictionary = filter_trials(input_trials, **kwargs) + + # Make the list of dictionaries into a dataframe + dataframe_raw = pd.DataFrame(trials_dictionary) + + # If we just want to print stats, do that and exit + if stats: + print_stats(dataframe_raw, **kwargs) + return + + # Now select whether we want only good trials or rather all of them + if include_failures: + dataframe = dataframe_raw + else: + dataframe = dataframe_raw[dataframe_raw["good"]] + + # Now select the best one + best_idx = dataframe.loss.idxmin() + best_trial = dataframe.ix[best_idx] + # Select it in this a bit more complicatted way for the plotting code to work ???? + best_trial = dataframe[dataframe[keywords["id"]] == best_trial.get(keywords["id"])] + + if histogram: + draw_histogram(dataframe, histogram) + + # Plot the plot + fileout = f"{replica_path}/scan.pdf" + plot_scans(dataframe, best_trial, fileout) + print("Plot saved at {0}".format(fileout)) + +def main(): + from argparse import ArgumentParser + import glob + + parser = ArgumentParser() + parser.add_argument("folder", help = "Fit folder") + parser.add_argument("-v", "--val_multiplier", help = 'Fraction to weight the validation loss with (test_multiplier = 1-val_multiplier)', + type = float, default = 0.5) + parser.add_argument("-if", "--include_failures", help = 'Flag to include failed runs in the plots', action = 'store_true') + parser.add_argument("-t", "--threshold", help = 'Value of the loss function from which to consider a run to have failed', + type = float, default = 1e3) + parser.add_argument("-dh", "--histogram", help = 'Write a histogram for a given key') + parser.add_argument("-f", "--filter", help = "Add the filter key=value to the dataframe", nargs = '+') + parser.add_argument("-s", "--stats", help = "Print stats on the run instead of plotting", action = 'store_true') + parser.add_argument("-k", "--stats_keys", help = "When using toghether with -s, chooses which keys to look at", + default = ['optimizer', 'number_of_layers', 'initializer', 'layer_1', 'layer_2', 'layer_3', 'layer_4', 'learning_rate', 'epochs'], + nargs = '+') + parser.add_argument("--ncombinations", help = "When using together with -s, chooses how many keys to combine", type = int, default = 2) + parser.add_argument("--n_to_remove", help = "Number of worst values (by reward) to remove", type = int, default = 2) + parser.add_argument("--remove_by_key", help = "Remove by key instead of by the worst of all combinations", action = "store_true") + parser.add_argument("--save_n_best", help = "Prints the n best models as a range", type = int, default = 10) + args = parser.parse_args() + + # Search for all json for all replicas + search_str = "{0}/nnfit/replica_*/tries.json".format(args.folder) + all_json = glob.glob(search_str) + + for json_path in all_json: + replica_path = os.path.dirname(json_path) + generate_scan_report_from_file( + replica_path, + val_f=args.val_multiplier, + include_failures=args.include_failures, + threshold=args.threshold, + histogram=args.histogram, + filter_me=args.filter, + stats=args.stats, + stats_keys=args.stats_keys, + n_combinations=args.ncombinations, + how_many_to_kill=args.n_to_remove, + remove_by_key=args.remove_by_key, + save_n_best=args.save_n_best, + ) + + +if __name__ == "__main__": + main() diff --git a/n3fit/src/n3fit/io/__init__.py b/n3fit/src/n3fit/io/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/n3fit/src/n3fit/io/reader.py b/n3fit/src/n3fit/io/reader.py new file mode 100644 index 0000000000..a155e7959c --- /dev/null +++ b/n3fit/src/n3fit/io/reader.py @@ -0,0 +1,318 @@ +""" + Library of function for reading NNPDF objects +""" +import hashlib +import copy +import numpy as np + +from NNPDF import RandomGenerator +from validphys.core import ExperimentSpec as vp_Exp +from validphys.core import DataSetSpec as vp_Dataset + + +def make_tr_val_mask(datasets, exp_name, seed): + """ + Generates the training and validation masks for a given experiment + + # Arguments: + - `datasets`: list of datasets corresponding to one experiment + - `exp_name`: name of the experiment + - `seed` : seed for the random tr/vl split + + # Return + - `trmask`: numpy boolean array + - `vlmask`: not trmask + """ + # Set the seed for the experiment + nameseed = int(hashlib.sha256(exp_name.encode()).hexdigest(), 16) % 10 ** 8 + nameseed += seed + np.random.seed(nameseed) + + trmask_partial = [] + vlmask_partial = [] + for dataset_dict in datasets: + ndata = dataset_dict["ndata"] + frac = dataset_dict["frac"] + trmax = int(frac * ndata) + mask = np.concatenate( + [np.ones(trmax, dtype=np.bool), np.zeros(ndata - trmax, dtype=np.bool)] + ) + np.random.shuffle(mask) + trmask_partial.append(mask) + vlmask_partial.append(mask == False) + trmask = np.concatenate(trmask_partial) + vlmask = np.concatenate(vlmask_partial) + + return trmask, vlmask + + +def fk_parser(fk, is_hadronic=False): + """ + # Arguments: + - `fk`: fktable object + + # Return: + - `dict_out`: dictionary with all information about the fktable + - 'xgrid' + - 'nx' + - 'ndata' + - 'basis' + - 'fktable' + """ + + # Dimensional information + nonzero = fk.GetNonZero() + ndata = fk.GetNData() + nx = fk.GetTx() + + # Basis of active flavours + nbasis = nonzero + basis = fk.get_flmap() + + # Read the xgrid and the fktable to numpy arrays + xgrid_flat = fk.get_xgrid() + fktable_flat = fk.get_sigma() + + # target shape + if is_hadronic: + nxsqrt = int(np.sqrt(nx)) + shape_out = (ndata, nbasis, nxsqrt, nxsqrt) + xgrid = xgrid_flat.reshape(1, nxsqrt) + else: + shape_out = (ndata, nbasis, nx) + xgrid = xgrid_flat.reshape(1, nx) + + # remove padding from the fktable (if any) + l = len(fktable_flat) + # get the padding + pad_position = fk.GetDSz() + pad_size = fk.GetDSz() - nx * nonzero + # remove the padding + if pad_size > 0: + mask = np.ones(l, dtype=bool) + for i in range(1, int(l / pad_position) + 1): + marker = pad_position * i + mask[marker - pad_size : marker] = False + fktable_array = fktable_flat[mask] + else: + fktable_array = fktable_flat + # reshape + fktable = fktable_array.reshape(shape_out) + + dict_out = { + "ndata": ndata, + "nbasis": nbasis, + "nonzero": nbasis, + "basis": basis, + "nx": nx, + "xgrid": xgrid, + "fktable": fktable, + } + return dict_out + + +def common_data_reader_dataset(dataset_c, dataset_spec): + """ + Import fktable, common data and experimental data for the given data_name + + # Arguments: + - `dataset_c`: c representation of the dataset object + - `dataset_spec`: python representation of the dataset object + + #Returns: + - `[dataset_dict]`: a (len 1 list of) dictionary with: + - dataset: full dataset object from which all data has been extracted + - hadronic: is hadronic? + - operation: operation to perform on the fktables below + - fktables : a list of dictionaries with the following items + - ndata: number of data points + - nonzero/nbasis: number of PDF entries which are non zero + - basis: array containing the information of the non-zero PDF entries + - nx: number of points in the xgrid (1-D), i.e., for hadronic the total number is nx * nx + - xgrid: grid of x points + - fktable: 3/4-D array of shape (ndata, nonzero, nx, (nx)) + + instead of the dictionary object that model_gen needs + """ + how_many = dataset_c.GetNSigma() + dict_fktables = [] + for i in range(how_many): + fktable = dataset_c.GetFK(i) + dict_fktables.append(fk_parser(fktable, dataset_c.IsHadronic())) + + dataset_dict = { + "fktables": dict_fktables, + "hadronic": dataset_c.IsHadronic(), + "operation": dataset_spec.op, + "name": dataset_c.GetSetName(), + "frac": dataset_spec.frac, + "ndata": dataset_c.GetNData(), + } + + return [dataset_dict] + + +def common_data_reader_experiment(experiment_c, experiment_spec): + """ + Wrapper around the experiments. Loop over all datasets in an experiment, + calls common_data_reader on them and return a list with the content. + + # Arguments: + - `experiment_c`: c representation of the experiment object + - `experiment_spec`: python representation of the experiment object + + # Returns: + - `[parsed_datasets]`: a list of dictionaries output from `common_data_reader_dataset` + """ + parsed_datasets = [] + for dataset_c, dataset_spec in zip( + experiment_c.DataSets(), experiment_spec.datasets + ): + parsed_datasets += common_data_reader_dataset(dataset_c, dataset_spec) + return parsed_datasets + + +def common_data_reader(spec, t0pdfset, replica_seeds=None, trval_seeds=None): + """ + Wrapper to read the information from validphys object + This function receives either a validphyis experiment or dataset objects + + # Returns: + - `all_dict_out`: a dictionary containing all the information of the experiment/dataset + for training, validation and experimental + 'datasets' : list of the datasets contained in the experiment + 'name' : name of the experiment + 'expdata_true' : non-replica data + 'invcovmat_true' : inverse of the covmat (non-replica) + + 'trmask' : mask for the training data + 'invcovmat' : inverse of the covmat for the training data + 'ndata' : number of datapoints for the training data + 'expdata' : experimental data (replica'd) for training + + 'vlmask' : (same as above for validation) + 'invcovmat_vl' : (same as above for validation) + 'ndata_vl' : (same as above for validation) + 'expdata_vl' : (same as above for validation) + + 'positivity' : bool - is this a positivity set? + + + """ + if replica_seeds is None: + replica_seeds = [] + if trval_seeds is None: + trval_seeds = [0] + # TODO + # This whole thing would be much more clear / streamlined if + # - The c experiment/dataset object had all the required information for the fit + # (i.e., there is a swig conversion for everything, right now missing the operator) + # - The python object stored the load within the spec when doing spec.load() + # this way it would not be necessary to load twice + # - The python object had all necessary information (same as point 1 but inverted) + + spec_c = spec.load() + ndata = spec_c.GetNData() + expdata_true = spec_c.get_cv().reshape(1, ndata) + spec_c.SetT0(t0pdfset) + base_mcseed = int(hashlib.sha256(str(spec).encode()).hexdigest(), 16) % 10 ** 8 + + if replica_seeds: + all_expdatas = [] + else: + all_expdatas = [expdata_true.reshape(ndata)] + + for replica_seed in replica_seeds: + spec_replica = copy.deepcopy(spec) + spec_replica_c = spec_replica.load() # I might need the t0 set here as well + + # Replica generation + mcseed = base_mcseed + replica_seed + RandomGenerator.InitRNG(0, mcseed) + spec_replica_c.MakeReplica() + all_expdatas.append(spec_replica_c.get_cv()) + + # spec_c = spec_replica_c + + if isinstance(spec, vp_Exp): + datasets = common_data_reader_experiment(spec_c, spec) + elif isinstance(spec, vp_Dataset): + datasets = common_data_reader_dataset(spec_c, spec) + else: + raise ValueError( + "reader.py: common_data_reader, didn't understood spec type: {0}".format( + type(spec) + ) + ) + + exp_name = spec.name + covmat = spec_c.get_covmat() + + # Now it is time to build the masks for the training validation split + all_dict_out = [] + for expdata, trval_seed in zip(all_expdatas, trval_seeds): + tr_mask, vl_mask = make_tr_val_mask(datasets, exp_name, seed=trval_seed) + + covmat_tr = covmat[tr_mask].T[tr_mask] + ndata_tr = np.count_nonzero(tr_mask) + expdata_tr = expdata[tr_mask].reshape(1, ndata_tr) + invcovmat_tr = np.linalg.inv(covmat_tr) + + covmat_vl = covmat[vl_mask].T[vl_mask] + ndata_vl = np.count_nonzero(vl_mask) + expdata_vl = expdata[vl_mask].reshape(1, ndata_vl) + invcovmat_vl = np.linalg.inv(covmat_vl) + + dict_out = { + "datasets": datasets, + "name": exp_name, + "expdata_true": expdata_true, + "invcovmat_true": np.linalg.inv(covmat), + "trmask": tr_mask, + "invcovmat": invcovmat_tr, + "ndata": ndata_tr, + "expdata": expdata_tr, + "vlmask": vl_mask, + "invcovmat_vl": invcovmat_vl, + "ndata_vl": ndata_vl, + "expdata_vl": expdata_vl, + "positivity": False, + } + all_dict_out.append(dict_out) + + return all_dict_out + + +def positivity_reader(pos_spec): + """ + Specific reader for positivity sets + """ + pos_c = pos_spec.load() + ndata = pos_c.GetNData() + + parsed_set = [fk_parser(pos_c, pos_c.IsHadronic())] + + pos_sets = [ + { + "fktables": parsed_set, + "hadronic": pos_c.IsHadronic(), + "operation": "NULL", + "name": pos_spec.name, + "frac": 1.0, + "ndata": ndata, + } + ] + + positivity_factor = pos_spec.poslambda + + dict_out = { + "datasets": pos_sets, + "trmask": np.ones(ndata, dtype=np.bool), + "name": pos_spec.name, + "expdata": np.zeros((1, ndata)), + "ndata": ndata, + "positivity": True, + "lambda": positivity_factor, + } + + return dict_out diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py new file mode 100644 index 0000000000..860c832515 --- /dev/null +++ b/n3fit/src/n3fit/io/writer.py @@ -0,0 +1,127 @@ +""" + Module containing functions dedicated to the write down of the output of n3fit + + The goal is to generate the same folder/file structure as the old nnfit code + so previously active scripts can still work. +""" + +import numpy as np +from reportengine.compat import yaml + +def evln2lha(evln): + # evln Basis {"PHT","SNG","GLU","VAL","V03","V08","V15","V24","V35","T03","T08","T15","T24","T35"}; + # lha Basis: {"TBAR","BBAR","CBAR","SBAR","UBAR","DBAR","GLUON","D","U","S","C","B","T","PHT"} + lha = np.zeros(evln.shape) + lha[13] = evln[0] + + lha[6] = evln[2] + + lha[8] = ( 10*evln[1] + + 30*evln[9] + 10*evln[10] + 5*evln[11] + 3*evln[12] + 2*evln[13] + + 10*evln[3] + 30*evln[4] + 10*evln[5] + 5*evln[6] + 3*evln[7] + 2*evln[8] ) / 120 + + lha[4] = ( 10*evln[1] + + 30*evln[9] + 10*evln[10] + 5*evln[11] + 3*evln[12] + 2*evln[13] + - 10*evln[3] - 30*evln[4] - 10*evln[5] - 5*evln[6] - 3*evln[7] - 2*evln[8] ) / 120 + + lha[7] = ( 10*evln[1] + - 30*evln[9] + 10*evln[10] + 5*evln[11] + 3*evln[12] + 2*evln[13] + + 10*evln[3] - 30*evln[4] + 10*evln[5] + 5*evln[6] + 3*evln[7] + 2*evln[8] ) / 120 + + lha[5] = ( 10*evln[1] + - 30*evln[9] + 10*evln[10] + 5*evln[11] + 3*evln[12] + 2*evln[13] + - 10*evln[3] + 30*evln[4] - 10*evln[5] - 5*evln[6] - 3*evln[7] - 2*evln[8] ) / 120 + + lha[9] = ( 10*evln[1] + - 20*evln[10] + 5*evln[11] + 3*evln[12] + 2*evln[13] + + 10*evln[3] - 20*evln[5] + 5*evln[6] + 3*evln[7] + 2*evln[8] ) / 120 + + lha[3] = ( 10*evln[1] + - 20*evln[10] + 5*evln[11] + 3*evln[12] + 2*evln[13] + - 10*evln[3] + 20*evln[5] - 5*evln[6] - 3*evln[7] - 2*evln[8] ) / 120 + + lha[10] = ( 10*evln[1] + - 15*evln[11] + 3*evln[12] + 2*evln[13] + + 10*evln[3] - 15*evln[6] + 3*evln[7] + 2*evln[8] ) / 120 + + lha[2] = ( 10*evln[1] + - 15*evln[11] + 3*evln[12] + 2*evln[13] + - 10*evln[3] + 15*evln[6] - 3*evln[7] - 2*evln[8] ) / 120 + + lha[11] = ( 5*evln[1] + - 6*evln[12] + evln[13] + + 5*evln[3] - 6*evln[7] + evln[8] ) / 60 + + lha[1] = ( 5*evln[1] + - 6*evln[12] + evln[13] + - 5*evln[3] + 6*evln[7] - evln[8] ) / 60 + + lha[12] = ( evln[1] + - evln[13] + + evln[3] - evln[8] ) / 12 + + lha[0] = ( evln[1] + - evln[13] + - evln[3] + evln[8] ) / 12 + return lha + + +def storefit(pdf_function, replica, replica_path, fitname, q20, nite, erf_vl, erf_tr, chi2, + arc_lengths, all_chi2_lines, all_preproc_lines, pos_state): + """ + One-trick function which generates all output in the NNPDF format so that all other scripts can still be used. + + # Argument: + - `pdf_function`: PDF function (usually the .predict method of a model) that receives as input a point in x and returns an array of 14 flavours + - `replica` : the replica index + - `replica_path` : path for this replica + - `fitname` : name of the fit + - `q20` : q_0^2 + - `nite` : number of epochs before stopping + - `erf_vl` : chi2 for the validation + - `erf_tr` : chi2 for the training + - `chi2` : chi2 for the exp (unreplica'd data) + - `arc_lengths` : list with the arclengths of all replicas + - `all_chi2_lines`: a big log with the chi2 every 100 epochs + - `all_preproc_lines`: (None) + - `pos_state`: positivity passing flag + """ + # build exportgrid + xgrid = np.array([1.00000000000000e-09, 1.29708482343957e-09, 1.68242903474257e-09, 2.18225315420583e-09, 2.83056741739819e-09, 3.67148597892941e-09, 4.76222862935315e-09, 6.17701427376180e-09, 8.01211109898438e-09, 1.03923870607245e-08, 1.34798064073805e-08, 1.74844503691778e-08, 2.26788118881103e-08, 2.94163370300835e-08, 3.81554746595878e-08, 4.94908707232129e-08, 6.41938295708371e-08, 8.32647951986859e-08, 1.08001422993829e-07, 1.40086873081130e-07, 1.81704331793772e-07, 2.35685551545377e-07, 3.05703512595323e-07, 3.96522309841747e-07, 5.14321257236570e-07, 6.67115245136676e-07, 8.65299922973143e-07, 1.12235875241487e-06, 1.45577995547683e-06, 1.88824560514613e-06, 2.44917352454946e-06, 3.17671650028717e-06, 4.12035415232797e-06, 5.34425265752090e-06, 6.93161897806315e-06, 8.99034258238145e-06, 1.16603030112258e-05, 1.51228312288769e-05, 1.96129529349212e-05, 2.54352207134502e-05, 3.29841683435992e-05, 4.27707053972016e-05, 5.54561248105849e-05, 7.18958313632514e-05, 9.31954227979614e-05, 1.20782367731330e-04, 1.56497209466554e-04, 2.02708936328495e-04, 2.62459799331951e-04, 3.39645244168985e-04, 4.39234443000422e-04, 5.67535660104533e-04, 7.32507615725537e-04, 9.44112105452451e-04, 1.21469317686978e-03, 1.55935306118224e-03, 1.99627451141338e-03, 2.54691493736552e-03, 3.23597510213126e-03, 4.09103436509565e-03, 5.14175977083962e-03, 6.41865096062317e-03, 7.95137940306351e-03, 9.76689999624100e-03, 1.18876139251364e-02, 1.43298947643919e-02, 1.71032279460271e-02, 2.02100733925079e-02, 2.36463971369542e-02, 2.74026915728357e-02, 3.14652506132444e-02, 3.58174829282429e-02, 4.04411060163317e-02, 4.53171343973807e-02, 5.04266347950069e-02, 5.57512610084339e-02, 6.12736019390519e-02, 6.69773829498255e-02, 7.28475589986517e-02, 7.88703322292727e-02, 8.50331197801452e-02, 9.13244910278679e-02, 9.77340879783772e-02, 1.04252538208639e-01, 1.10871366547237e-01, 1.17582909372878e-01, 1.24380233801599e-01, 1.31257062945031e-01, 1.38207707707289e-01, 1.45227005135651e-01, 1.52310263065985e-01, 1.59453210652156e-01, 1.66651954293987e-01, 1.73902938455578e-01, 1.81202910873333e-01, 1.88548891679097e-01, 1.95938145999193e-01, 2.03368159629765e-01, 2.10836617429103e-01, 2.18341384106561e-01, 2.25880487124065e-01, 2.33452101459503e-01, 2.41054536011681e-01, 2.48686221452762e-01, 2.56345699358723e-01, 2.64031612468684e-01, 2.71742695942783e-01, 2.79477769504149e-01, 2.87235730364833e-01, 2.95015546847664e-01, 3.02816252626866e-01, 3.10636941519503e-01, 3.18476762768082e-01, 3.26334916761672e-01, 3.34210651149156e-01, 3.42103257303627e-01, 3.50012067101685e-01, 3.57936449985571e-01, 3.65875810279643e-01, 3.73829584735962e-01, 3.81797240286494e-01, 3.89778271981947e-01, 3.97772201099286e-01, 4.05778573402340e-01, 4.13796957540671e-01, 4.21826943574548e-01, 4.29868141614175e-01, 4.37920180563205e-01, 4.45982706956990e-01, 4.54055383887562e-01, 4.62137890007651e-01, 4.70229918607142e-01, 4.78331176755675e-01, 4.86441384506059e-01, 4.94560274153348e-01, 5.02687589545177e-01, 5.10823085439086e-01, 5.18966526903235e-01, 5.27117688756998e-01, 5.35276355048428e-01, 5.43442318565661e-01, 5.51615380379768e-01, 5.59795349416641e-01, 5.67982042055800e-01, 5.76175281754088e-01, 5.84374898692498e-01, 5.92580729444440e-01, 6.00792616663950e-01, 6.09010408792398e-01, 6.17233959782450e-01, 6.25463128838069e-01, 6.33697780169485e-01, 6.41937782762089e-01, 6.50183010158361e-01, 6.58433340251944e-01, 6.66688655093089e-01, 6.74948840704708e-01, 6.83213786908386e-01, 6.91483387159697e-01, 6.99757538392251e-01, 7.08036140869916e-01, 7.16319098046733e-01, 7.24606316434025e-01, 7.32897705474271e-01, 7.41193177421404e-01, 7.49492647227008e-01, 7.57796032432224e-01, 7.66103253064927e-01, 7.74414231541921e-01, 7.82728892575836e-01, 7.91047163086478e-01, 7.99368972116378e-01, 8.07694250750291e-01, 8.16022932038457e-01, 8.24354950923382e-01, 8.32690244169987e-01, 8.41028750298844e-01, 8.49370409522600e-01, 8.57715163684985e-01, 8.66062956202683e-01, 8.74413732009721e-01, 8.82767437504206e-01, 8.91124020497459e-01, 8.99483430165226e-01, 9.07845617001021e-01, 9.16210532771399e-01, 9.24578130473112e-01, 9.32948364292029e-01, 9.41321189563734e-01, 9.49696562735755e-01, 9.58074441331298e-01, 9.66454783914439e-01, 9.74837550056705e-01, 9.83222700304978e-01, 9.91610196150662e-01, 1.00000000000000e+00]).reshape(-1, 1) + + result = pdf_function(xgrid) + lha = evln2lha(result.T).T + + data = { + 'replica' : replica, + 'q20' : q20, + 'xgrid': xgrid.T.tolist()[0], + 'labels': ['TBAR', 'BBAR', 'CBAR', 'SBAR', 'UBAR', 'DBAR', 'GLUON', 'D', 'U', 'S', 'C', 'B', 'T', 'PHT'], + 'pdfgrid': lha.tolist() + } + + with open(f'{replica_path}/{fitname}.exportgrid', 'w') as fs: + yaml.dump(data, fs) + + + # create empty files to make postfit happy + emptyfiles = ['chi2exps.log', f'{fitname}.params', f'{fitname}.sumrules'] + for fs in emptyfiles: + open(f'{replica_path}/{fs}', 'a').close() + + # Write chi2exp + with open(f'{replica_path}/chi2exps.log', 'w') as fs: + for line in all_chi2_lines: + fs.write(line) + + # Write preproc information + with open(f'{replica_path}/{fitname}.preproc','w') as fs: + for line in all_preproc_lines: + fs.write(line) + + # create info file + arc_strings = [str(i) for i in arc_lengths] + arc_line = ' '.join(arc_strings) + with open(f'{replica_path}/{fitname}.fitinfo', 'w') as fs: + fs.write(f'{nite} {erf_vl} {erf_tr} {chi2} {pos_state}\n') + fs.write(arc_line) diff --git a/n3fit/src/n3fit/layers/DIS.py b/n3fit/src/n3fit/layers/DIS.py new file mode 100644 index 0000000000..2e57cadcd4 --- /dev/null +++ b/n3fit/src/n3fit/layers/DIS.py @@ -0,0 +1,42 @@ +import numpy as np +from n3fit.layers.Observable import Observable + + +class DIS(Observable): + """ + The DIS class receives a list of active flavours and a fktable + and prepares a layer that performs the convolution of said fktable with + the incoming pdf. + """ + + def gen_basis(self, basis): + """ + Receives a list of active flavours and generates a boolean mask + + # Arguments: + - `basis`: list of active flavours + """ + if basis is not None: + self.basis = np.zeros(self.nfl, dtype=bool) + for i in basis: + self.basis[i] = True + else: + self.basis = np.ones(self.nfl, dtype=bool) + + def call(self, pdf_in): + """ + Thiss function perform the fktable \otimes pdf convolution. + + Firs pass the input PDF through a mask to remove the unactive flavour, then transpose the PDF + to have everything in the correct order and finally perform a tensorproduct contracting both pdf indices. + + # Arguments: + - `pdf_in`: rank 2 tensor (xgrid, flavours) + # Returns: + - `result`: rank 1 tensor (ndata) + """ + pdf_masked = self.boolean_mask(pdf_in, self.basis, axis=1) + + pdfT = self.transpose(pdf_masked) + result = self.tensor_product(self.fktable, pdfT, axes=2) + return result diff --git a/n3fit/src/n3fit/layers/DY.py b/n3fit/src/n3fit/layers/DY.py new file mode 100644 index 0000000000..d9461d987f --- /dev/null +++ b/n3fit/src/n3fit/layers/DY.py @@ -0,0 +1,68 @@ +import numpy as np +from n3fit.layers.Observable import Observable + + +class DY(Observable): + """ + Computes the convolution of two PDFs (the same one twice) and one fktable + """ + + def gen_basis(self, basis): + """ + Receives an array of combinations and make it into an array of 2-tuples + + # Arguments: + - `basis`: array of combinations in the form + [i1,j1,i2,j2,i3,j3...] + """ + basis_to_pairs = basis.reshape(-1, 2) + self.basis = basis_to_pairs + self.basis_size = len(self.basis) + + def call(self, pdf_in): + """ + Thiss function perform the fktable \otimes pdf \otimes pdf convolution. + + First uses the basis of active combinations to generate a luminosity tensor + with only some flavours active. + + The concatenate function returns a rank-3 tensor (combination_index, xgrid, xgrid) + which can in turn be contracted with the rank-4 fktable. + + # Arguments: + - `pdf_in`: rank 2 tensor (xgrid, flavours) + + # Returns: + - `result`: rank 1 tensor (ndata) + """ + # This is a convoluted way of applying a mask, but it is faster + # mask-version below + lumi_fun = [] + pdfT = self.transpose(pdf_in) + + for i, j in self.basis: + lumi_fun.append(self.tensor_product(pdfT[i], pdfT[j], axes=0)) + + pdf_X_pdf = self.concatenate(lumi_fun, axis=0, target_shape=(self.basis_size, self.xgrid_size, self.xgrid_size)) + + result = self.tensor_product(self.fktable, pdf_X_pdf, axes=3) + return result + + +# Another example on how to performt the DY convolution +# this code is equivalent to the previos one, with a slightly greater cost +class DY_mask(Observable): + def gen_basis(self, basis): + if basis is not None: + self.basis = np.zeros((self.nfl, self.nfl), dtype=bool) + for i, j in basis.reshape(-1, 2): + self.basis[i, j] = True + else: + self.basis = np.ones((self.nfl, self.nfl), dtype=bool) + + def call(self, pdf_in): + lfun = self.tensor_product(pdf_in, pdf_in, axes=0) + lfunT = self.permute_dimensions(lfun, (3, 1, 2, 0)) + x = self.boolean_mask(lfunT, self.basis, axis=0) + result = self.tensor_product(self.fktable, x, axes=3) + return result diff --git a/n3fit/src/n3fit/layers/MSR_Normalization.py b/n3fit/src/n3fit/layers/MSR_Normalization.py new file mode 100644 index 0000000000..666beca627 --- /dev/null +++ b/n3fit/src/n3fit/layers/MSR_Normalization.py @@ -0,0 +1,41 @@ +from n3fit.backends import MetaLayer + + +class MSR_Normalization(MetaLayer): + """ + Applies the normalisation so that the PDF output fullfills the sum rules + """ + + def __init__(self, output_dim=14, **kwargs): + self.output_dim = output_dim + self.one = self.tensor_ones((1, 1)) + self.three = 3 * self.tensor_ones((1, 1)) + super(MSR_Normalization, self).__init__(**kwargs, name="normalizer") + + def compute_output_shape(self, input_shape): + return (self.output_dim,) + + def call(self, x): + """ + Receives as input a tensor with the value of the MSR for each PDF + and returns a rank-1 tensor with the normalization factor A_i of each flavour + """ + pdf_sr = self.concatenate( + [ + self.one, # photon + self.one, # sigma + (self.one - x[0]) / x[1], # g + self.three / x[2], # v + self.one / x[3], # v3 + self.three / x[4], # v8 + self.three / x[2], # v15 + self.three / x[2], # v24 + self.three / x[2], # v35 + self.one, # t3 + self.one, # t8 + self.one, # t15 (c-) + self.one, # t24 + self.one, # t35 + ] + ) + return pdf_sr diff --git a/n3fit/src/n3fit/layers/Mask.py b/n3fit/src/n3fit/layers/Mask.py new file mode 100644 index 0000000000..b789c75d37 --- /dev/null +++ b/n3fit/src/n3fit/layers/Mask.py @@ -0,0 +1,32 @@ +import numpy as np +from n3fit.backends import MetaLayer + + +class Mask(MetaLayer): + """ + This layers applies a boolean mask to a rank-1 input tensor. + + Its most common use is the training/validation split. + The mask admit a multiplier for all outputs + + # Arguments: + - `bool_mask`: numpy array with the boolean mask to be applied + - `c`: constant multiplier for every output + """ + + def __init__(self, bool_mask, c=1.0, **kwargs): + self.output_dim = np.count_nonzero(bool_mask) + self.mask = bool_mask + self.c = c + super(MetaLayer, self).__init__(**kwargs) + + def build(self, input_shape): + initializer = MetaLayer.init_constant(value=self.c) + self.kernel = self.builder_helper("mask", (1,), initializer, trainable=False) + super(Mask, self).build(input_shape) + + def compute_output_shape(self, input_shape): + return (self.output_dim, None) + + def call(self, prediction_in): + return self.boolean_mask(self.kernel * prediction_in, self.mask) diff --git a/n3fit/src/n3fit/layers/Observable.py b/n3fit/src/n3fit/layers/Observable.py new file mode 100644 index 0000000000..919db50785 --- /dev/null +++ b/n3fit/src/n3fit/layers/Observable.py @@ -0,0 +1,45 @@ +from n3fit.backends import MetaLayer +from abc import abstractmethod, ABC + + +class Observable(MetaLayer, ABC): + """ + This class is the parent of the DIS and DY convolutions. + All backend-dependent code necessary for the convolutions + is (must be) concentrated here + + The methods gen_basis and call must be overriden by the observables + where + - gen_basis: it is called by the initializer and generates the mask between + fktables and pdfs + - call: this is what does the actual operation + + # Arguments: + - `output_dim`: output dimension of the observable (can be read from the fktable) + - `fktable`: fktable + - `basis`: list of active combinations of flavours + - `nfl` : total number of flavours in the pdf (default = 14) + """ + + def __init__(self, output_dim, fktable, basis=None, nfl=14, **kwargs): + self.nfl = nfl + + self.output_dim = output_dim + self.fktable = self.np_to_tensor(fktable) + self.xgrid_size = self.fktable.shape[-1] + + self.gen_basis(basis) + + super(MetaLayer, self).__init__(**kwargs) + + def compute_output_shape(self, input_shape): + return (self.output_dim, None) + + # Overridables + @abstractmethod + def gen_basis(self, basis): + pass + + @abstractmethod + def call(self, pdf_in): + pass diff --git a/n3fit/src/n3fit/layers/Preprocessing.py b/n3fit/src/n3fit/layers/Preprocessing.py new file mode 100644 index 0000000000..77f6ccedf0 --- /dev/null +++ b/n3fit/src/n3fit/layers/Preprocessing.py @@ -0,0 +1,74 @@ +from n3fit.backends import MetaLayer +from n3fit.backends import constraints + +BASIS_SIZE = 8 + + +class Preprocessing(MetaLayer): + """ + Applies preprocessing to the PDF. + + This layer generates a factor (1-x)^beta*x^(1-alpha) where both beta and alpha + are parameters to be fitted. + + Both beta and alpha are initialized uniformly within the ranges allowed in the runcard and + then they are only allowed to move between those two values (with a hard wall in each side) + + # Arguments: + - `output_dim`: size of the fitbasis + - `trainable`: bool, whether beta and alpha should be fixed + - `flav_info`: dictionary containg the information with the limits of alpha and beta + - `seed`: seed for the initializer of the random alpha, beta values + """ + + def __init__(self, output_dim=BASIS_SIZE, trainable=True, flav_info=None, seed=0, **kwargs): + self.output_dim = output_dim + self.trainable = trainable + self.flav_info = flav_info + self.seed = seed + super(MetaLayer, self).__init__(**kwargs) + + def compute_output_shape(self, input_shape): + return (input_shape[0], self.output_dim) + + def build(self, input_shape): + self.kernel = [] + for i in range(BASIS_SIZE * 2): + fl = int(i / 2) + f = self.flav_info[fl]["fl"] + if i % 2: + name = "beta_{0}".format(f) + kind = "largex" + else: + name = "alfa_{0}".format(f) + kind = "smallx" + + limits = self.flav_info[fl][kind] + lm = limits[0] + lp = limits[1] + init = MetaLayer.select_initializer("random_uniform", minval=lm, maxval=lp, seed=self.seed + i) + weight_constraint = constraints.MinMaxWeight(lm, lp) + + nw = self.builder_helper( + name=name, kernel_shape=(1,), initializer=init, trainable=self.trainable, constraint=weight_constraint + ) + + self.kernel.append(nw) + + super(Preprocessing, self).build(input_shape) + + def call(self, x): + pdf_raw = self.concatenate( + [ + x ** (1 - self.kernel[0][0]) * (1 - x) ** self.kernel[1][0], # sigma + x ** (1 - self.kernel[2][0]) * (1 - x) ** self.kernel[3][0], # g + x ** (1 - self.kernel[4][0]) * (1 - x) ** self.kernel[5][0], # v + x ** (1 - self.kernel[6][0]) * (1 - x) ** self.kernel[7][0], # v3 + x ** (1 - self.kernel[8][0]) * (1 - x) ** self.kernel[9][0], # v8 + x ** (1 - self.kernel[10][0]) * (1 - x) ** self.kernel[11][0], # t3 = sigma + x ** (1 - self.kernel[12][0]) * (1 - x) ** self.kernel[13][0], # t8 = sigma + x ** (1 - self.kernel[14][0]) * (1 - x) ** self.kernel[15][0], # t15 c- + ], + axis=1, + ) + return pdf_raw diff --git a/n3fit/src/n3fit/layers/Rotation.py b/n3fit/src/n3fit/layers/Rotation.py new file mode 100644 index 0000000000..0526dd40e9 --- /dev/null +++ b/n3fit/src/n3fit/layers/Rotation.py @@ -0,0 +1,36 @@ +from n3fit.backends import MetaLayer + + +class Rotation(MetaLayer): + """ + Applies a transformation from the dimension-8 fit basis + to the dimension-14 evolution basis + """ + + def __init__(self, output_dim=14, **kwargs): + self.output_dim = output_dim + super(MetaLayer, self).__init__(**kwargs, name="evolution") + + def compute_output_shape(self, input_shape): + return (input_shape[0], self.output_dim) + + def call(self, x_raw): + x = self.transpose(x_raw) + pdf_raw_list = [ + 0 * x[0], # photon + x[0], # sigma + x[1], # g + x[2], # v + x[3], # v3 + x[4], # v8 + x[2], # v15 + x[2], # v24 + x[2], # v35 + x[5], # t3 + x[6], # t8 + x[0] - 4 * x[7], # t15 (c-) + x[0], # t24 + x[0], # t35 + ] + pdf = self.concatenate(pdf_raw_list, target_shape=(self.output_dim, -1)) + return self.transpose(pdf) diff --git a/n3fit/src/n3fit/layers/__init__.py b/n3fit/src/n3fit/layers/__init__.py new file mode 100644 index 0000000000..4d87049cec --- /dev/null +++ b/n3fit/src/n3fit/layers/__init__.py @@ -0,0 +1,8 @@ +from n3fit.layers.Preprocessing import Preprocessing +from n3fit.layers.Rotation import Rotation +from n3fit.layers.x_operations import xIntegrator, xDivide, xMultiply +from n3fit.layers.MSR_Normalization import MSR_Normalization +from n3fit.layers.DIS import DIS +from n3fit.layers.DY import DY +from n3fit.layers.Mask import Mask +from n3fit.backends import MetaLayer diff --git a/n3fit/src/n3fit/layers/x_operations.py b/n3fit/src/n3fit/layers/x_operations.py new file mode 100644 index 0000000000..d22e8aa5e0 --- /dev/null +++ b/n3fit/src/n3fit/layers/x_operations.py @@ -0,0 +1,105 @@ +""" + This module contains layers acting on the x-grid input of the NN + + The three operations included are: + - `xDivide` + - `xMultiply` + - `xIntegrator` + + The names are self-describing. The only subtlety is that they do not act equally + for all flavours. The choice of flavours on which to act in a different way is given + as an input argument. +""" + +from n3fit.backends import MetaLayer + + +class xDivide(MetaLayer): + """ + Divide the pdf by x + By default: dimension = 8 dividing the entries corresponding to v, v3, v8 + + # Arguments: + - `output_dim`: dimension of the pdf + - `div_list`: list of indices to be divided by X (by default [2,3,4]; [v, v3, v8] + """ + + def __init__(self, output_dim=8, div_list=None, **kwargs): + if div_list is None: + div_list = [2, 3, 4] + self.output_dim = output_dim + self.div_list = div_list + super(MetaLayer, self).__init__(**kwargs) + + def compute_output_shape(self, input_shape): + return (input_shape[0], self.output_dim) + + def call(self, x): + out_array = [] + one = self.tensor_ones_like(x) + for i in range(self.output_dim): + if i in self.div_list: + res = one / x + else: + res = one + out_array.append(res) + out_tensor = self.concatenate(out_array, axis=1) + return out_tensor + + +class xMultiply(MetaLayer): + """ + Multiply the pdf by x + By default: dimension = 8 multiply all entries but v, v3, v8 + + # Arguments: + - `output_dim`: dimension of the pdf + - `not_mul_list`: list of indices *not* to multiply by X (by default [2,3,4]; [v, v3, v8] + """ + + def __init__(self, output_dim=8, not_mul_list=None, **kwargs): + if not_mul_list is None: + not_mul_list = [2, 3, 4] + self.output_dim = output_dim + self.not_mul_list = not_mul_list + super(MetaLayer, self).__init__(**kwargs) + + def compute_output_shape(self, input_shape): + return (input_shape[0], self.output_dim) + + def call(self, x): + out_array = [] + one = self.tensor_ones_like(x) + for i in range(self.output_dim): + if i in self.not_mul_list: + res = one + else: + res = one * x + out_array.append(res) + out_tensor = self.concatenate(out_array, axis=1) + return out_tensor + + +class xIntegrator(MetaLayer): + """ + This layer performs a sum of the input layer/tensor on the first axis + """ + + def __init__(self, grid_weights, output_dim=8, **kwargs): + grid_weights_tensor = self.np_to_tensor(grid_weights) + # Open up the grid weights + self.grid_weights = self.many_replication(grid_weights_tensor, replications=8, axis=1) + self.output_dim = output_dim + super(MetaLayer, self).__init__(**kwargs) + + def compute_output_shape(self, input_shape): + return (self.output_dim,) + + def call(self, x): + """ + Receives as input a rank-2 tensor `x` (xpoints, flavours) + and returns a summation on the first index (xpoints) of tensor `x`, weighted by the + weights of the grid (in the most common case, 1/grid_points) + """ + xx = x * self.grid_weights + return self.sum(xx, axis=0) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py new file mode 100644 index 0000000000..cff984ff32 --- /dev/null +++ b/n3fit/src/n3fit/model_gen.py @@ -0,0 +1,249 @@ +""" + Library of functions which generate the NN objects + + Contains: + # observable_generator: + Generates the output layers + # pdfNN_layer_generator: + Generates the PDF NN layer to be fitted +""" +from n3fit.layers import DIS +from n3fit.layers import DY +from n3fit.layers import Mask +from n3fit.layers import Preprocessing, Rotation + +from n3fit.backends import operations +from n3fit.backends import losses +from n3fit.backends import MetaLayer +from n3fit.backends import base_layer_selector, concatenate, Lambda + + +def observable_generator( + spec_dict, positivity_initial=None, positivity_multiplier=1.05, verbose=False, positivity_steps=300 +): + """ + This function generates the observable generator. + It loads the fktable in the convolution layer (hadronic or DIS) and prepares the loss function. + If the dataset is a positivity dataset acts in consequence. + + # Arguments: + - `spec_dict`: a dictionary-like object containing the information of the observable + - `positivity_initial`: if given, set this number as the positivity multiplier for epoch 1 + - `positivity_multiplier`: how much the positivity increases every 100 steps + - `positivity_steps`: if positivity_initial is not given, computes the initial by assuming we want, + after 100**positivity_steps epochs, to have the lambda of the runcard + + # Returns: a dictionary with: + - `inputs`: input layer + - `output`: output layer (unmasked) + - `loss` : loss function (unmasked) + - `output_tr`: output layer (training) + - `loss_tr` : loss function (training) + - `output_vl`: output layer (validation) + - `loss_vl` : loss function (validation) + """ + spec_name = spec_dict["name"] + model_inputs = [] + model_obs = [] + + # The first step is to generate an observable layer for each given datasets + for dataset_dict in spec_dict["datasets"]: + dataname = dataset_dict["name"] + + # Choose which kind of observable are we dealing with, since this will define the convolution + if dataset_dict["hadronic"]: + Obs_Layer = DY + else: + Obs_Layer = DIS + + # Define the operation (if any) + op = operations.c_to_py_fun(dataset_dict["operation"], name=dataname) + # Retrieve the list of fktables + fktable_list = dataset_dict["fktables"] + + obs_list = [] + input_list = [] + # Now generate an input and output layer for each sub_obs of the dataset + for i, fktable_dict in enumerate(fktable_list): + # Input layer + input_layer = operations.numpy_to_input(fktable_dict["xgrid"].T) + input_list.append(input_layer) + # Output layer + obs_layer = Obs_Layer( + output_dim=fktable_dict["ndata"], + fktable=fktable_dict["fktable"], + basis=fktable_dict["basis"], + name="{0}_{1}".format(dataname, i), + input_shape=(14,), + ) + obs_list.append((input_layer, obs_layer)) + + # Add the inputs to the lists of inputs of the model + model_inputs += input_list + # Append the combination of observable and the operation to be applied to the list of model_obs + model_obs.append((op, obs_list)) + + def final_obs(pdf_layer): + all_ops = [] + for operation, observables in model_obs: + all_obs = [] + for i_layer, o_layer in observables: + all_obs.append(o_layer(pdf_layer(i_layer))) + all_ops.append(operation(all_obs)) + if len(all_ops) == 1: + return all_ops[0] + else: + return concatenate(all_ops, axis=0, name=spec_name + "_full") + + if spec_dict["positivity"]: + max_lambda = spec_dict["lambda"] + if not positivity_initial: + initial_lambda = max_lambda / pow(positivity_multiplier, positivity_steps) + else: + initial_lambda = positivity_initial + out_tr_mask = Mask(bool_mask=spec_dict["trmask"], c=initial_lambda, name=spec_name) + + def out_tr(pdf_layer): + return out_tr_mask(final_obs(pdf_layer)) + + layer_info = {"inputs": model_inputs, "output_tr": out_tr, "loss_tr": losses.l_positivity()} + return layer_info + + # Now generate the mask layers to be applied to training and validation + out_tr_mask = Mask(bool_mask=spec_dict["trmask"], name=spec_name) + out_vl_mask = Mask(bool_mask=spec_dict["vlmask"], name=spec_name + "_val") + + def out_tr(pdf_layer): + return out_tr_mask(final_obs(pdf_layer)) + + def out_vl(pdf_layer): + return out_vl_mask(final_obs(pdf_layer)) + + # Generate the loss function as usual + invcovmat = spec_dict["invcovmat_true"] + loss = losses.l_invcovmat(invcovmat) + + invcovmat_tr = spec_dict["invcovmat"] + loss_tr = losses.l_invcovmat(invcovmat_tr) + + invcovmat_vl = spec_dict["invcovmat_vl"] + loss_vl = losses.l_invcovmat(invcovmat_vl) + + layer_info = { + "inputs": model_inputs, + "output": final_obs, + "loss": loss, + "output_tr": out_tr, + "loss_tr": loss_tr, + "output_vl": out_vl, + "loss_vl": loss_vl, + } + + return layer_info + + +def pdfNN_layer_generator( + inp=2, + nodes=None, + activations=None, + initializer_name="glorot_normal", + layer_type="dense", + flav_info=None, + out=14, + seed=None, + dropout=0.0, +): + """ + Generates the PDF model which takes as input a point in x (from 0 to 1) and outputs a basis of 14 PDFs. + It generates the preprocessing of the x into a set (x, log(x)), the arbitrary NN to fit the form of the PDF + and the preprocessing factors. + + # Arguments: + - `inp`: dimension of the xgrid. If inp=2, turns the x point into a (x, log(x)) pair + - `nodes' : list of the number of nodes per layer of the PDF NN. Default: [15,8] + - `activation`: list of activation functions to apply to each layer. Default: ["tanh", "linear"] + if the number of activation function does not match the number of layers, it will add + copies of the first activation function found + - `initializer_name`: selects the initializer of the weights of the NN. Default: glorot_normal + - `layer_type`: selects the type of architecture of the NN. Default: dense + - `flav_info`: dictionary containing the information about each PDF (basis dictionary in the runcard) + to be used by Preprocessing + - `out`: number of output flavours of the model + - `seed`: seed to initialize the NN + - `dropout`: rate of dropout layer by layer + + # Returns: + - `layer_pdf`: a function which, upon calling it with a tensor, will connect all PDF layers and output a tensor of size (batch_size, `out`) + - `dict_layers`: a dictionary containing some of the intermediate layers (necessary for debugging and to compute intermediate quantities) + + """ + if nodes is None: + nodes = [15, 8] + if activations is None: + activations = ["tanh", "linear"] + # Safety check + ln = len(nodes) + la = len(activations) + if ln != la: + raise ValueError("Number of activation functions does not match number of layers @ model_gen.py") + + # If dropout == 0, don't add dropout layer + if dropout == 0.0: + dropme = -99 + else: # Add the dropout to the second to last layer + dropme = ln - 2 + + # Layer generation + dl = [] + pre = inp + for i, (units, activation) in enumerate(zip(nodes, activations)): + if i == dropme and i != 0: + dl.append(base_layer_selector("dropout", rate=dropout)) + + init = MetaLayer.select_initializer(initializer_name, seed=seed + i) + + arguments = {"kernel_initializer": init, "units": int(units), "activation": activation, "input_shape": (pre,)} + # Arguments that are not used by a given layer are just dropped + tmpl = base_layer_selector(layer_type, **arguments) + + dl.append(tmpl) + pre = int(units) + + if inp == 2: + add_log = Lambda(lambda x: concatenate([x, operations.op_log(x)], axis=1)) + + def dense_me(x): + if inp == 1: + curr_fun = dl[0](x) + else: + curr_fun = dl[0](add_log(x)) + + for dense_layer in dl[1:]: + curr_fun = dense_layer(curr_fun) + return curr_fun + + # Preprocessing layer (will be multiplied to the last of the denses) + preproseed = seed + ln + layer_preproc = Preprocessing(input_shape=(1,), name="pdf_prepro", flav_info=flav_info, seed=preproseed) + + # Evolution layer + layer_evln = Rotation(input_shape=(pre,), output_dim=out) + + # Generate multiplier layer + # multiplier = Multiply(name = "MultiplyPreproc") + + # Apply preprocessing + def layer_fitbasis(x): + return operations.op_multiply([dense_me(x), layer_preproc(x)]) + + # Rotation layer, changes from the 8-basis to the 14-basis + def layer_pdf(x): + return layer_evln(layer_fitbasis(x)) + + dict_layers = { + "denses": dense_me, # The set of the N dense layers + "preprocessing": layer_preproc, # The layer that applies preprocessing + "fitbasis": layer_fitbasis, # Applied preprocessing to the output of the denses + } + + return layer_pdf, dict_layers diff --git a/n3fit/src/n3fit/msr.py b/n3fit/src/n3fit/msr.py new file mode 100644 index 0000000000..552319e9c9 --- /dev/null +++ b/n3fit/src/n3fit/msr.py @@ -0,0 +1,161 @@ +""" + The constraint module include functions to impose the momentum sum rules on the PDFs +""" +import numpy as np + +from n3fit.layers import xDivide, MSR_Normalization, xIntegrator, xMultiply +from n3fit.backends import operations +from n3fit.backends import MetaModel + + +def gen_integration_input(nx): + """ + Generates a np.array (shaped (nx,1)) of nx elements where the + nx/2 first elements are a logspace between 0 and 0.1 + and the rest a linspace from 0.1 to 0 + """ + lognx = int(nx / 2) + linnx = int(nx - lognx) + xgrid_log = np.logspace(-9, -1, lognx + 1) + xgrid_lin = np.linspace(0.1, 1, linnx) + xgrid = np.concatenate([xgrid_log[:-1], xgrid_lin]).reshape(nx, 1) + + spacing = [0.0] + for i in range(1, nx): + spacing.append(np.abs(xgrid[i - 1] - xgrid[i])) + spacing.append(0.0) + + weights = [] + for i in range(nx): + weights.append((spacing[i] + spacing[i + 1]) / 2.0) + weights_array = np.array(weights).reshape(nx, 1) + + return xgrid, weights_array + + +def msr_impose(fit_layer, final_pdf_layer, verbose=False): + """ + This function receives: + - fit_layer: the 8-basis layer of PDF which we fit + - final_layer: the 14-basis which is fed to the fktable + It uses pdf_fit to compute the sum rule and returns a modified version of the final_pdf layer + with a normalisation by which the sum rule is imposed + """ + # 1. Generate the fake input which will be used to integrate + nx = int(2e3) + xgrid, weights_array = gen_integration_input(nx) + + # 2. Prepare the pdf for integration + # for that we need to multiply several flavours with 1/x + division_by_x = xDivide() + + def pdf_integrand(x): + res = operations.op_multiply([division_by_x(x), fit_layer(x)]) + return res + + # 3. Now create the integration layer (the layer that will simply integrate, given some weight + integrator = xIntegrator(weights_array, input_shape=(nx,)) + + # 4. Now create the normalization by selecting the right integrations + normalizer = MSR_Normalization(input_shape=(8,)) + + # 5. Make the xgrid numpy array into a backend input layer so it can be given + xgrid_input = operations.numpy_to_input(xgrid) + normalization = normalizer(integrator(pdf_integrand(xgrid_input))) + + def ultimate_pdf(x): + return operations.op_multiply_dim([final_pdf_layer(x), normalization]) + + if verbose: + # only_int = integrator(pdf_integrand(xgrid_input)) + # modelito = MetaModel(xgrid_input, only_int) + # result = modelito.predict(x = None, steps = 1) + + print(" > > Generating model for the inyection layer which imposes MSR") + check_integration(ultimate_pdf, xgrid_input) + + # Save a reference to xgrid in ultimate_pdf, very useful for debugging + ultimate_pdf.ref_xgrid = xgrid_input + + return ultimate_pdf, xgrid_input + + +def check_integration(ultimate_pdf, integration_input): + """ + Naive integrator for quick checks. + Receives the final PDF layer, computes the 4 MSR and prints out the result + + Called only (for debugging purposes) by msr_impose above + """ + nx = int(1e4) + xgrid, weights_array = gen_integration_input(nx) + xgrid_input = operations.numpy_to_input(xgrid) + + multiplier = xDivide(output_dim=14, div_list=range(3, 9)) + + def pdf_integrand(x): + res = operations.op_multiply([multiplier(x), ultimate_pdf(x)]) + return res + + modelito = MetaModel([xgrid_input, integration_input], pdf_integrand(xgrid_input)) + modelito.summary() + result = modelito.predict(x=None, steps=1) + + result_weighted = result * weights_array + result_integrated = np.sum(result_weighted, axis=0) + + msr = result_integrated[1] + result_integrated[2] + v = result_integrated[3] + v3 = result_integrated[4] + v8 = result_integrated[5] + print( + """ + > > > Int from 0 to 1 of: + x*g(x) + x*sigma(x) = {0} + v = {1} + v3 = {2} + v8 = {3}""".format( + msr, v, v3, v8 + ) + ) + + +def compute_arclength(fitbasis_layer, verbose=False): + """ + Given the layer with the fit basis computes the arc length + """ + nx = int(1e6) + xgrid, weights_array = gen_integration_input(nx) + multiplier = xMultiply() + xgrid_input = operations.numpy_to_input(xgrid) + eps = xgrid[0] / 2.0 + xgrid_input_prime = operations.numpy_to_input(xgrid - eps) + + def integrand(xgrid_input, xgrid_input_prime): + f1 = fitbasis_layer(xgrid_input) + f2 = fitbasis_layer(xgrid_input_prime) + pdfprime = operations.op_subtract([f2, f1]) + + res = operations.op_multiply([multiplier(xgrid_input), pdfprime]) + return res + + modelito = MetaModel([xgrid_input, xgrid_input_prime], integrand(xgrid_input, xgrid_input_prime)) + derivatives = modelito.predict(x=None, steps=1) / eps + derivatives_sq = pow(derivatives, 2) + f_of_x = np.sqrt(1.0 + derivatives_sq) + arc_lengths = np.sum(f_of_x * weights_array, axis=0) + + if verbose: + print( + """ + > > > Arc length: + sigma = {0} + g = {1} + v = {2} + v3 = {3} + v8 = {4}""".format( + *arc_lengths[:5] + ) + ) + + return arc_lengths diff --git a/n3fit/src/n3fit/n3fit.py b/n3fit/src/n3fit/n3fit.py new file mode 100755 index 0000000000..b9b8a12a2a --- /dev/null +++ b/n3fit/src/n3fit/n3fit.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +""" + n3fit - performs fit using ml external frameworks +""" + +import sys +import re +import shutil +import pathlib +import logging +import warnings +import argparse + +from validphys.app import App +from validphys.config import Environment, Config +from validphys.config import EnvironmentError_, ConfigError +from reportengine import colors +from reportengine.compat import yaml + + +N3FIT_FIXED_CONFIG = dict( + use_cuts = 'internal', + use_t0 = True, + actions_ = ['datacuts::theory fit'] +) + +N3FIT_PROVIDERS = ["n3fit.fit"] + +log = logging.getLogger(__name__) + +RUNCARD_COPY_FILENAME = "filter.yml" + + +class N3FitError(Exception): + """Exception raised when n3fit cannot succeed and knows why""" + + pass + + +class N3FitEnvironment(Environment): + """Container for information to be filled at run time""" + + def init_output(self): + # check file exists, is a file, has extension. + if not self.config_yml.exists(): + raise N3FitError("Invalid runcard. File not found.") + else: + if not self.config_yml.is_file(): + raise N3FitError("Invalid runcard. Must be a file.") + + # check if results folder exists + self.output_path = pathlib.Path(self.output_path).absolute() + if not self.output_path.exists(): + if not re.fullmatch(r"[\w.\-]+", self.output_path.name): + raise N3FitError("Invalid output folder name. Must be alphanumeric.") + try: + self.output_path.mkdir() + pathlib.Path(self.output_path / "nnfit").mkdir() + except OSError as e: + raise EnvironmentError_(e) from e + + try: + shutil.copy2(self.config_yml, self.output_path / RUNCARD_COPY_FILENAME) + except shutil.SameFileError: + pass + + # create output folder for the fit + self.replica_path = self.output_path / "nnfit" + for replica in self.replica: + path = self.replica_path / "replica_{0}".format(replica) + log.info("Creating replica output folder in {0}".format(path)) + try: + path.mkdir(exist_ok=True) + except OSError as e: + raise EnvironmentError_(e) from e + + @classmethod + def ns_dump_description(cls): + return { + "replica": "The MC replica number", + "replica_path": "The replica output path", + "output_path": "The runcard name", + "hyperopt": "The hyperopt flag", + "create_test_card": "Create test card flag", + **super().ns_dump_description(), + } + + +class N3FitConfig(Config): + """Specialization for yaml parsing""" + + @classmethod + def from_yaml(cls, o, *args, **kwargs): + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore", yaml.error.MantissaNoDotYAML1_1Warning) + # We need to specify the older version 1.1 to support the + # older configuration files, which liked to use on/off for + # booleans. + # The floating point parsing yields warnings everywhere, which + # we suppress. + file_content = yaml.safe_load(o, version="1.1") + except yaml.error.YAMLError as e: + raise ConfigError(f"Failed to parse yaml file: {e}") + if not isinstance(file_content, dict): + raise ConfigError(f"Expecting input runcard to be a mapping, " f"not '{type(file_content)}'.") + file_content.update(N3FIT_FIXED_CONFIG) + return cls(file_content, *args, **kwargs) + + +class N3FitApp(App): + """The class which parsers and performs the fit""" + + environment_class = N3FitEnvironment + config_class = N3FitConfig + + def __init__(self): + super(N3FitApp, self).__init__(name="n3fit", providers=N3FIT_PROVIDERS) + + @property + def argparser(self): + parser = super().argparser + parser.add_argument("-o", "--output", help="Output folder and name of the fit", default=None) + parser.add_argument("--hyperopt", help="Enable hyperopt scan", default=None, type=int) + parser.add_argument( + "--create_test_card", + help="Generates a new runcard where some datasets are only used for a test set (only hyperopt)", + action="store_true", + ) + + def check_positive(value): + ivalue = int(value) + if ivalue <= 0: + raise argparse.ArgumentTypeError("%s is an invalid positive int value." % value) + return ivalue + + parser.add_argument("replica", help="MC replica number", type=check_positive) + parser.add_argument( + "-r", "--replica_range", help="End of the range of replicas to compute", type=check_positive + ) + return parser + + def get_commandline_arguments(self, cmdline=None): + args = super().get_commandline_arguments(cmdline) + if args["output"] is None: + args["output"] = pathlib.Path(args["config_yml"]).stem + return args + + def run(self): + try: + self.environment.config_yml = pathlib.Path(self.args["config_yml"]).absolute() + replica = self.args["replica"] + if self.args["replica_range"]: + replicas = list(range(replica, self.args["replica_range"] + 1)) + else: + replicas = [replica] + self.environment.replica = replicas + self.environment.hyperopt = self.args["hyperopt"] + if self.args["create_test_card"]: + self.environment.create_test_card = self.environment.config_yml + else: + self.environment.create_test_card = None + super().run() + except N3FitError as e: + log.error(f"Error in n3fit:\n{e}") + sys.exit(1) + except Exception as e: + log.critical(f"Bug in n3fit ocurred. Please report it.") + print(colors.color_exception(e.__class__, e, e.__traceback__), file=sys.stderr) + sys.exit(1) + + +def main(): + a = N3FitApp() + a.main() + + +if __name__ == "__main__": + main() diff --git a/n3fit/src/n3fit/readme.md b/n3fit/src/n3fit/readme.md new file mode 100644 index 0000000000..523986ce92 --- /dev/null +++ b/n3fit/src/n3fit/readme.md @@ -0,0 +1,280 @@ +Ciao everyone, + +As promised here is the "beta" version of the new fitting code for everyone to have fun with it. We put all our code in the folder `n3fit` (i.e., everything below is referred to this folder). + +# n3fit.py + +This version is what we promised to make public in Varenna next August, in the end we decided to push also the hyperopt code since it was already ready rather than have a stripped-down version for Easter. + + +## Look before you leap + +There are a few things to note before running this code. + +### Differences with NNPDF + +- Preprocessing: + +This is perhaps one of the biggest differences with respect to NNPDF. The preprocessing variables are being fitted (within the parameters given in the runcard). There is room for new studies on this. + +- The network: + +We have constructed the network as a single net, i.e., all layers are fully connected. + +- Positivity and stopping: + +Positivity is enforced and replicas are only accepted once they pass some positivity criteria. This is related to stopping in that the stop only occurs once the validation loss doesn't improve anymore **_and_** the positivity criteria is fulfilled. + + +- Seeds: + +Everything is seeded at the level of the experiments, i.e., if you were to change the order of two experiments the results _should_ still be the same. + +### Performance, hardware usage + +- CPU usage: + +As said in Amsterdam, the code is quite fast and able to do a fit for a single replica in the order of 10 minutes / 1 hour for DIS / Global. However the usage of the CPU power is not the most efficient + +By default tensorflow will try to generate as many processes as logical threads your computer has (x2), however most of them are sitting idle most of the time, this is due to the amount of data we have available, it is actually not enough to actually give something to do to every core (_if_ you have a big enough number of powerful enough cores). + +We have limited the code to 8-16 threads. The limitation is done in line 34,35 of `backend/keras_backend/debug_mode.py`. + +- Memory usage: + +In our tests we find an usage of 2 GB of memory for DIS and 16 GB of memory for Global. I would personally recommend to be very generous with the memory requirements since otherwise you might end up swapping, which will greatly hinder performance. + +**Warning:** If you were to run this code in a cluster with not very good sandboxing you will be still running in 8 processors even if you only asked for 2. Therefore if you are going to ask for a small number of processors I would recommend to change those settings in the code as well. You don't want to make your IT department sad. + +_WIP:_ Eventually the resources management will be part of the runcard, but we are studying how to best reduce the impact (specially on memory) so it is still work in progress and so run it at your own risk. + +### Caveats, things to be wary of + +The gradient descent algorithms are quite powerful and the possibility of running much faster means you can run much deeper networks. However, as a wise man once said, _with great power comes great responsability_. In particular in our tests we found **it is not difficult to run into overfitting issues**. + +Overfitting is most easily done in DIS where the number of data points is small enough that is not difficult to generate a network with a number of parameters comparable to the number of data points. In this situation it is possible to find overfitting even with cross validation measures. We uploaded a report where the overfitting is quite obvious as an example: https://vp.nnpdf.science/EIA8y1JXQVmT9RYBLlUeJw==/ + + +For truly reproducible runs you should set `debug: true` in the runcard as, by default, we allow tensorflow freedom to play with its own internal variables. Debug mode turns off hyperthreading and fixes tensorflow's internal variables. + +The folder structure is not final, specially the files found in the top level folder. There is still some refactoring to do here so the explanation for the folders below might be outdated in a few weeks. + +The code works as a validphys app but it is not integrated in validphys. It is not installed when you run `conda install nnpdf` or `make install`. At the moment this could be taken as "Release Candidate" but not as a final product. + +At the moment since this is a beta version the final touchs of development are being in the n3pdf repository since there are a few pull requests open. Once we have merged all pull requests there we will move development to this one. + + +### Requirements: + +- **NNPDF:** Make sure NNPDF is installed and that python is able to import validphys. +- **Python packages:** usually having NNPDF installed and working means you will have everything you need, possible pitfalls: ruamel.yaml +- **Keras & tensorflow:** if you installed nnpdf with conda I would suggest installing them also with conda to ensure compatibility between python versions (3.6/3.7 brings some changes that can affect tensorflow). Note: tensorflow-opt includes some optimizations for running on CPU so it is the recommended version. +- **Hyperopt:** in order to use hyperopt you need to install the hyperopt python package, pandas and seaborn. +- **Operative System:** we have run this code in many several different Linux machines (with wildly different Linux flavours) in both python 3.6 and 3.7. We haven't tested Mac OS but there is no reason it should not work. + +## Folder Structure: + +### Backend + +All backend dependent functions and classes are in the `backends` folder. At the moment there is only one backend implemented: keras (`backend/keras_backend`). + +In order to implement a new different backend it should be enough to ensure it provides the same classes and methods. + +### Layers + +The custom layers that define the different pieces of the Neural Network are all in the `layers/` folder. Each layer is a different class extending `backends/?/MetaLayer.py`. + +Note for instance the DIS layer `layers/DIS.py`. This layer is an operation implementing the convolution of one PDF with one FKTable (according to the active flavours). +The plan is to replace some of the operations in these layers by custom operations as we believe this will have an impact on the memory usage of the code. + + +### Base folder + +The base folder is (temporarily) where the rest of the code lies, but let me break it down. + +#### Main code: + +- n3fit.py + +Validphys app definition. + +- fit.py + +Driver of the fitting code, the hyperoptimization and the output of the code. + +- constrains.py + +Implements restrictions such as the momentum sum rule. + +- model_gen.py + +Implements functions dedicated to generate the different independent pieces of the Neural Network such as the Observable layers or the PDF layers. + +- reader.py + +Implements the reading of the datasets, experiments and positivity sets into python objects. This is the only place where libnnpdf is used. The hope is that most of the logic in this file will be substituted by PR #404. + +- writer.py + +Implements the functions dedicated to the output of the fit (crucially, .exportgrid) + +- statistics.py + +Class that saves information about the fitting such a number of epochs, losses of the different pieces, etc. It is also manages the stopping criteria. + +- ModelTrainer.py + +This class defines the behaviour of the fit, creates the Neural Network and performs the fit. + + +#### Hyperopt: + +These are files related to the hyperoptimization mode of the code. Below in the basic usage section there are some notes on how to use it in case someone wants to give a try. + +- HyperAlgorith.py + +- HyperScanner.py + +- filetrials.py + +- plotting.py + +- create_testset.py + + +#### Runcards and report examples + +We provide three runcards (in the `runcards` folder): + +- Basic: Basic_runcard.yml, for quick testing with just two datasets +- DIS: PN3_DIS_example.yml (report example: https://vp.nnpdf.science/fpzXjidYR7SMrFBXpSacEA==) +- Global: PN3_Global_example.yml(report example: https://vp.nnpdf.science/BT98HzUSTxqg6_sDkkUx0A== ) + + +#### Evolution code + +Instead of using apfel replica per replica the code found in the folder `evolven3fit' will run the evolution for all replicas at once. +This code doesn't do any kind of sanity check or similar, as long as it finds a .gridinfo file, it will be happy. + +Usage: + + make + ./evolven3fit run_folder number_of_replicas + + +### Basic Usage: + +The most basic usage is running a fit for a given runcard for a given replica: + + python3 n3fit.py runcard.yml replica_number + +This will generate a folder structure akin to the one nnfit currently generates. It is not necessary to run vp-setupfit (if you do it, it will not hurt, you will need it anyway for generating a report at some point in the pipeline). + +Example: + + python3 n3fit.py runcards/NNPDF31_nlo_as_0118_basic.yml 1 + +will generater a folder `NNPDF31_nlo_as_0118_basic/nnfit/replica_1` containing a number of files with arclengths, chi2, etc. + + +#### Running multiple replicas sequentially: +With the option -r is it possible to run more replicas without the need for loading the data each time. This is just a convenience feature + + python3 n3fit.py runcards/NNPDF31_nlo_as_0118_basic.yml 1 -r 4 + +will run replicas 1 to 4 one after the other. + +#### Hyperoptimization: +The option --hyperopt will activate the hyperoptimizer + + python3 n3fit.py runcards/NNPDF31_nlo_as_0118_basic.yml 1 --hyperopt 100 + +will run 100 iterations of the hyperoptimizer. We also include a script that will plot the results as a `scan.pdf' file. + + python3 plotting.py NNPDF31_nlo_as_0118_basic + +As mentioned before, it is important to avoid overfitting, relying blindly in hyperopt, however, can result in models prone to overfitting. For hyperopt a model that overfits is very good, the most straightforward way of solving this issue is to create a set of datasets that don't enter the fitting or the validation. We include a script to do this in a systematic way with the following option: + + python3 n3fit.py runcards/NNPDF31_nlo_as_0118_basic.yml 1 --create_test_card + +This will generate a TEST runcard with a TEST set included which can be use for hyperoptimization. + + +#### Example: from a fit to a validphys report + + +In order to generate a fit it is necessary to run the code for a number of replicas. Let's say we want a fit of the runcard DIS.yml with 20 replicas: + +``` +for replica in {1..20} +do + ./n3fit.py DIS.yml ${replica} +done +``` + +After this we'll have a `DIS/nnfit' folder with 20 replicas. Now in order to generate the fit we use the following workflow: + +First some legacy code, it's only needed for the other scripts not to complain + + vp-setupfit DIS.yml + +Now we can run evolven3fit with the folder were the replicas are included and the maximum number of replicas to find. + + ./evolven3fit DIS 20` + +From this point onwards the steps are not different from before, for completeness they are also included here: + +After running the evolution code we run postfit to generate the final PDF, for that we give the number of members we want to find and the folder. + + postfit 15 DIS + +After postfit has finished you will want the results to the NNPDF share folder, assuming a conda installation: + + cp -RL DIS $CONDA_PREFIX/share/NNPDF/results/ + +Finally, for generating the plots you need to use comparefits and follow the instructions, for the DIS runcard we provide you can use 181101-001-sc as the reference fit. + + vp-comparefits DIS 181101-001-sc --title comparison --author $USER --keywords n3fit -o my_DIS_report + + +### Going forward: + +This is a list of ideas we have so people can start working on things if they feel like it. Some of them are necessary for the full integration with NNPDF (in particular testing and implementing the new fkparser) other can be interesting or fun side projects. + +- Testing the fkparser. + +Modifying reader to use the new fkparsers implemented in PR #404 instead of using the C functions. +All calls to libnnpdf are concentrated in this file so that the changes are easy to implement. + +- Closure tests + +We have not run any closure test or implemented a way of doing it in a systematic way. It could be an interesting exercise. + + +- Implement new backends + +As said before, it should be straightforward (a straight paths can be very long, complicated and full of holes, but the path itself has not many curves). + +- Run with other algorithms + +At the moment we have run with some of the algorithms included in the default keras installation, but the space of algorithms for machine learning is quite big. We have also limited ourselves to Gradient Descent family, but that's not the end of the story. + + +- Study different strategies for preprocessing: + +At the moment we are taking a very naive approach to the fitting of the processing: we let them vary freely within the strict limits set by the runcard. This may well be suboptimal. + +#### Some other things + +Some stuff that people is working on and is close to completion: + +- Implementation of custom operators and gradients to improve performance. +- An visualizer for the fitting of the PDF in the form of an animation. + + +-------- + +Let us know if anything is unclear and feel free to open any issues you find! + + + Juan & Stefano