diff --git a/CMakeLists.txt b/CMakeLists.txt index 08ea44b0a..9c332177e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.25) # Needed for CUDA, MPI, and CTest features project( EXP - VERSION "7.8.6" + VERSION "7.9.1" HOMEPAGE_URL https://github.com/EXP-code/EXP LANGUAGES C CXX Fortran) @@ -268,7 +268,13 @@ endif() # try to find pybind11 and build wrapper python module find_package(Python3 COMPONENTS Interpreter Development) message(STATUS "python3 include dirs: ${Python3_INCLUDE_DIRS}") - +if(Python3_FOUND) + set(HAVE_PYTHON3 TRUE) +else() + if(ENABLE_PYEXP) + message(FATAL_ERROR "You asked for pyEXP but I cannot find a Python3 environment. Please make Python3 available or disable pyEXP. CMake will exit." ) + endif() +endif() # Force installation of the yaml-cpp libraries install(TARGETS yaml-cpp DESTINATION lib) diff --git a/config_cmake.h_in b/config_cmake.h_in index d1cf00db3..6f45119a0 100644 --- a/config_cmake.h_in +++ b/config_cmake.h_in @@ -19,6 +19,9 @@ /* Defined if you have HDF5 support */ #cmakedefine HAVE_HDF5 @HAVE_HDF5@ +/* Defined if Python3 runtime exists */ +#cmakedefine HAVE_PYTHON3 1 + /* Define to 1 if you have the `cuda' library. */ #cmakedefine HAVE_LIBCUDA 1 diff --git a/doc/exp.cfg b/doc/exp.cfg index 1405e49c3..0d3fc92e0 100644 --- a/doc/exp.cfg +++ b/doc/exp.cfg @@ -48,7 +48,7 @@ PROJECT_NAME = EXP # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 7.8.0 +PROJECT_NUMBER = 7.9.1 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/expui/BasisFactory.H b/expui/BasisFactory.H index 19b2fe6e6..1598f3f7d 100644 --- a/expui/BasisFactory.H +++ b/expui/BasisFactory.H @@ -137,6 +137,7 @@ namespace BasisClasses Eigen::Vector3d currentAccel(double time); public: + //! The current pseudo acceleration Eigen::Vector3d pseudo {0, 0, 0}; @@ -249,8 +250,8 @@ namespace BasisClasses //@{ //! Initialize non-inertial forces - void setNonInertial(int N, Eigen::VectorXd& x, Eigen::MatrixXd& pos); - void setNonInertial(int N, const std::string& orient); + void setNonInertial(int Naccel, const Eigen::VectorXd& x, const Eigen::MatrixXd& pos); + void setNonInertial(int Naccel, const std::string& orient); //@} //! Set the current pseudo acceleration @@ -259,6 +260,16 @@ namespace BasisClasses if (Naccel > 0) pseudo = currentAccel(time); } + //! Returns true if non-inertial forces are active + bool usingNonInertial() { return Naccel > 0; } + + //! Reset to inertial coordinates + void setInertial() + { + Naccel = 0; + pseudo = {0, 0, 0}; + } + //! Get the field label vector std::vector getFieldLabels(void) { return getFieldLabels(coordinates); } diff --git a/expui/BasisFactory.cc b/expui/BasisFactory.cc index 483dcca46..b410c7861 100644 --- a/expui/BasisFactory.cc +++ b/expui/BasisFactory.cc @@ -283,8 +283,16 @@ namespace BasisClasses return makeFromArray(time); } - void Basis::setNonInertial(int N, Eigen::VectorXd& t, Eigen::MatrixXd& pos) + void Basis::setNonInertial(int N, const Eigen::VectorXd& t, const Eigen::MatrixXd& pos) { + // Sanity checks + if (t.size() < 1) + throw std::runtime_error("Basis: setNonInertial: no times in time array"); + + if (t.size() != pos.rows()) + throw std::runtime_error("Basis::setNonInertial: size mismatch in time and position arrays"); + + // Set the data Naccel = N; t_accel = t; p_accel = pos; @@ -351,16 +359,31 @@ namespace BasisClasses { Eigen::Vector3d ret; - if (time < t_accel(0) || time > t_accel(t_accel.size()-1)) { - std::cout << "ERROR: " << time << " is outside of range of the non-inertial DB" - << std::endl; - ret.setZero(); - return ret; + auto n = t_accel.size(); + + // Allow a little bit of buffer in the allowable on-grid range but + // otherwise force termination + // + if ( time < t_accel(0 ) - 0.5*(t_accel(1 ) - t_accel(0 )) || + time > t_accel(n-1) + 0.5*(t_accel(n-1) - t_accel(n-2)) ) { + + std::ostringstream msg; + msg << "Basis::currentAccel: " << time + << " is outside the range of the non-inertial DB [" + << t_accel(0) << ", " << t_accel(n-1) << "]"; + + throw std::runtime_error(msg.str()); } + // Do the quadratic interpolation + // else { int imin = 0; - int imax = std::lower_bound(t_accel.data(), t_accel.data()+t_accel.size(), time) - t_accel.data(); - if (imax > Naccel) imin = imax - Naccel; + int imax = std::lower_bound(t_accel.data(), t_accel.data()+n, time) - t_accel.data(); + + // Get a range around the current time of approx size Naccel + // + imax = std::min(n-1, imax + Naccel/2); + imin = std::max(imax - Naccel, 0); int num = imax - imin + 1; Eigen::VectorXd t(num); diff --git a/expui/BiorthBasis.H b/expui/BiorthBasis.H index 045e209e7..b52af8c08 100644 --- a/expui/BiorthBasis.H +++ b/expui/BiorthBasis.H @@ -163,6 +163,10 @@ namespace BasisClasses //! Clear the particle selector callback void clrSelector() { ftor = nullptr; } + + //! Evaluate acceleration in Cartesian coordinates in centered + //! coordinate system + virtual std::vector getAccel(double x, double y, double z) = 0; }; /** @@ -303,10 +307,14 @@ namespace BasisClasses { auto [ret, worst, lworst] = orthoCompute(orthoCheck(knots)); // For the CTest log - std::cout << "---- Spherical::orthoTest: worst=" << worst << std::endl; + if (myid==0) + std::cout << "---- Spherical::orthoTest: worst=" << worst << std::endl; return ret; } + //! Evaluate acceleration in Cartesian coordinates in centered + //! coordinate system + virtual std::vector getAccel(double x, double y, double z); }; /** @@ -538,10 +546,15 @@ namespace BasisClasses { auto [ret, worst, lworst] = orthoCompute(orthoCheck()); // For the CTest log - std::cout << "---- FlatDisk::orthoTest: worst=" << worst << std::endl; + if (myid==0) + std::cout << "---- FlatDisk::orthoTest: worst=" << worst << std::endl; return ret; } + //! Evaluate acceleration in Cartesian coordinates in centered + //! coordinate system + std::vector getAccel(double x, double y, double z); + }; /** @@ -689,10 +702,15 @@ namespace BasisClasses { auto [ret, worst, lworst] = orthoCompute(orthoCheck()); // For the CTest log - std::cout << "CBDisk::orthoTest: worst=" << worst << std::endl; + if (myid==0) + std::cout << "CBDisk::orthoTest: worst=" << worst << std::endl; return ret; } + //! Evaluate acceleration in Cartesian coordinates in centered + //! coordinate system + std::vector getAccel(double x, double y, double z); + }; /** @@ -718,7 +736,7 @@ namespace BasisClasses int rnum, pnum, tnum; double rmin, rmax, rcylmin, rcylmax; double acyl, hcyl; - bool expcond, logarithmic, density, EVEN_M; + bool expcond, logarithmic, density, EVEN_M, sech2 = false; std::vector potd, dpot, dpt2, dend; std::vector legs, dlegs, d2legs; @@ -842,9 +860,15 @@ namespace BasisClasses { auto [ret, worst, lworst] = orthoCompute(sl->orthoCheck()); // For the CTest log - std::cout << "---- Cylindrical::orthoTest: worst=" << worst << std::endl; + if (myid==0) + std::cout << "---- Cylindrical::orthoTest: worst=" << worst << std::endl; return ret; } + + //! Evaluate acceleration in Cartesian coordinates in centered + //! coordinate system + std::vector getAccel(double x, double y, double z); + }; /** @@ -985,12 +1009,18 @@ namespace BasisClasses } } } - + + if (myid==0) std::cout << "---- Slab::orthoTest: worst=" << worst << std::endl; + if (worst > __EXP__::orthoTol) return false; return true; } + //! Evaluate acceleration in Cartesian coordinates in centered + //! coordinate system + std::vector getAccel(double x, double y, double z); + }; /** @@ -1108,11 +1138,17 @@ namespace BasisClasses } } - std::cout << "---- Cube::orthoTest: worst=" << worst << std::endl; + if (myid==0) + std::cout << "---- Cube::orthoTest: worst=" << worst << std::endl; + if (worst > __EXP__::orthoTol) return false; return true; } + //! Evaluate acceleration in Cartesian coordinates in centered + //! coordinate system + std::vector getAccel(double x, double y, double z); + }; diff --git a/expui/BiorthBasis.cc b/expui/BiorthBasis.cc index 93f73a336..32b70dbda 100644 --- a/expui/BiorthBasis.cc +++ b/expui/BiorthBasis.cc @@ -294,7 +294,7 @@ namespace BasisClasses 0, 1, cachename); // Test basis for consistency - orthoTest(200); + if (myid==0) orthoTest(200); } void Bessel::initialize() @@ -320,7 +320,7 @@ namespace BasisClasses bess = std::make_shared(rmax, lmax, nmax, rnum); // Test basis for consistency - orthoTest(200); + if (myid==0) orthoTest(200); } void Spherical::reset_coefs(void) @@ -396,6 +396,10 @@ namespace BasisClasses CoefClasses::SphStruct* cf = dynamic_cast(coef.get()); + // Cache the current coefficient structure + // + coefret = coef; + // Assign internal coefficient table (doubles) from the complex struct // for (int l=0, L0=0, L1=0; l<=lmax; l++) { @@ -507,10 +511,7 @@ namespace BasisClasses // Get thread id int tid = omp_get_thread_num(); - double fac1, cosm, sinm; - double sinth = -sqrt(fabs(1.0 - costh*costh)); - - fac1 = factorial(0, 0); + double fac1 = factorial(0, 0); get_dens (dend[tid], r/scale); get_pot (potd[tid], r/scale); @@ -567,8 +568,8 @@ namespace BasisClasses moffset++; } else { - cosm = cos(phi*m); - sinm = sin(phi*m); + double cosm = cos(phi*m); + double sinm = sin(phi*m); double sumR0=0.0, sumP0=0.0, sumD0=0.0; double sumR1=0.0, sumP1=0.0, sumD1=0.0; @@ -610,6 +611,112 @@ namespace BasisClasses // Return force not potential gradient } + std::vector + Spherical::getAccel(double x, double y, double z) + { + // Get polar coordinates + double R = sqrt(x*x + y*y); + double r = sqrt(R*R + z*z); + double costh = z/r; + double sinth = R/r; + double phi = atan2(y, x); + + // Get thread id + int tid = omp_get_thread_num(); + + + double fac1 = factorial(0, 0); + + get_pot (potd[tid], r/scale); + get_force(dpot[tid], r/scale); + + legendre_R(lmax, costh, legs[tid], dlegs[tid]); + + double pot0, potr; + + if (NO_L0) { + pot0 = 0.0; + potr = 0.0; + } else { + pot0 = fac1 * expcoef.row(0).dot(potd[tid].row(0)); + potr = fac1 * expcoef.row(0).dot(dpot[tid].row(0)); + } + + double den1 = 0.0; + double pot1 = 0.0; + double pott = 0.0; + double potp = 0.0; + + // L loop + for (int l=1, loffset=1; l<=lmax; loffset+=(2*l+1), l++) { + + // Check for even l + if (EVEN_L and l%2) continue; + + // No l=1 + if (NO_L1 and l==1) continue; + + // M loop + for (int m=0, moffset=0; m<=l; m++) { + + if (M0_only and m) continue; + if (EVEN_M and m%2) continue; + + fac1 = factorial(l, m); + if (m==0) { + double sumR=0.0, sumP=0.0, sumD=0.0; + for (int n=std::max(0, N1); n<=std::min(nmax-1, N2); n++) { + sumR += expcoef(loffset+moffset, n) * dend[tid](l, n); + sumP += expcoef(loffset+moffset, n) * potd[tid](l, n); + sumD += expcoef(loffset+moffset, n) * dpot[tid](l, n); + } + + pot1 += fac1*legs[tid] (l, m) * sumP; + potr += fac1*legs[tid] (l, m) * sumD; + pott += fac1*dlegs[tid](l, m) * sumP; + + moffset++; + } + else { + double cosm = cos(phi*m); + double sinm = sin(phi*m); + + double sumR0=0.0, sumP0=0.0, sumD0=0.0; + double sumR1=0.0, sumP1=0.0, sumD1=0.0; + for (int n=std::max(0, N1); n<=std::min(nmax-1, N2); n++) { + sumP0 += expcoef(loffset+moffset+0, n) * potd[tid](l, n); + sumD0 += expcoef(loffset+moffset+0, n) * dpot[tid](l, n); + sumP1 += expcoef(loffset+moffset+1, n) * potd[tid](l, n); + sumD1 += expcoef(loffset+moffset+1, n) * dpot[tid](l, n); + } + + pot1 += fac1 * legs[tid] (l, m) * ( sumP0*cosm + sumP1*sinm ); + potr += fac1 * legs[tid] (l, m) * ( sumD0*cosm + sumD1*sinm ); + pott += fac1 * dlegs[tid](l, m) * ( sumP0*cosm + sumP1*sinm ); + potp += fac1 * legs[tid] (l, m) * (-sumP0*sinm + sumP1*cosm ) * m; + + moffset +=2; + } + } + } + + double potlfac = 1.0/scale; + + potr *= (-potlfac)/scale; + pott *= (-potlfac); + potp *= (-potlfac); + + double potR = potr*sinth + pott*costh; + double potz = potr*costh - pott*sinth; + + double tpotx = potR*x/R - potp*y/R ; + double tpoty = potR*y/R + potp*x/R ; + + // Return force not potential gradient + // + return {tpotx, tpoty, potz}; + } + std::vector Spherical::cyl_eval(double R, double z, double phi) @@ -631,7 +738,7 @@ namespace BasisClasses Spherical::crt_eval (double x, double y, double z) { - double R = sqrt(x*x + y*y); + double R = sqrt(x*x + y*y) + 1.0e-18; double phi = atan2(y, x); auto v = cyl_eval(R, z, phi); @@ -642,7 +749,6 @@ namespace BasisClasses return {v[0], v[1], v[2], v[3], v[4], v[5], tpotx, tpoty, v[7]}; } - Spherical::BasisArray SphericalSL::getBasis (double logxmin, double logxmax, int numgrid) { @@ -874,6 +980,7 @@ namespace BasisClasses "rcylmax", "acyl", "hcyl", + "sech2", "snr", "evcut", "nmaxfid", @@ -980,6 +1087,8 @@ namespace BasisClasses double w1 = 1.0/(1.0+dweight); double w2 = dweight/(1.0+dweight); + if (sech2) { h1 *= 0.5; h2 *= 0.5; } + double f1 = cosh(z/h1); double f2 = cosh(z/h2); @@ -991,13 +1100,14 @@ namespace BasisClasses case DiskType::diskbulge: { - double f = cosh(z/hcyl); + double h = sech2 ? 0.5*hcyl : hcyl; + double f = cosh(z/h); double rr = pow(pow(R, 2) + pow(z,2), 0.5); double w1 = Mfac; double w2 = 1.0 - Mfac; double as = HERNA; - ans = w1*exp(-R/acyl)/(4.0*M_PI*acyl*acyl*hcyl*f*f) + + ans = w1*exp(-R/acyl)/(4.0*M_PI*acyl*acyl*h*f*f) + w2*pow(as, 4)/(2.0*M_PI*rr)*pow(rr+as,-3.0) ; } break; @@ -1007,8 +1117,9 @@ namespace BasisClasses case DiskType::exponential: default: { - double f = cosh(z/hcyl); - ans = exp(-R/acyl)/(4.0*M_PI*acyl*acyl*hcyl*f*f); + double h = sech2 ? 0.5*hcyl : hcyl; + double f = cosh(z/h); + ans = exp(-R/acyl)/(4.0*M_PI*acyl*acyl*h*f*f); } break; } @@ -1130,6 +1241,7 @@ namespace BasisClasses if (conf["acyl" ]) acyl = conf["acyl" ].as(); if (conf["hcyl" ]) hcyl = conf["hcyl" ].as(); + if (conf["sech2" ]) sech2 = conf["sech2" ].as(); if (conf["lmaxfid" ]) lmaxfid = conf["lmaxfid" ].as(); if (conf["nmaxfid" ]) nmaxfid = conf["nmaxfid" ].as(); if (conf["nmax" ]) nmax = conf["nmax" ].as(); @@ -1300,8 +1412,20 @@ namespace BasisClasses try { // Check for map entry, will through if the DTYPE = dtlookup.at(dtype); // key is not in the map. - if (myid==0) // Report DiskType + if (myid==0) { // Report DiskType std::cout << "---- DiskType is <" << dtype << ">" << std::endl; + + if (not sech2) { + switch (DTYPE) { + case DiskType::doubleexpon: + case DiskType::exponential: + case DiskType::diskbulge: + std::cout << "---- pyEXP uses sech^2(z/h) rather than the more common sech^2(z/(2h))" << std::endl + << "---- Use the 'sech2: true' in your YAML config to use sech^2(z/(2h))" << std::endl + << "---- pyEXP will assume sech^2(z/(2h)) by default in v 7.9.0 and later" << std::endl; + } + } + } } catch (const std::out_of_range& err) { if (myid==0) { @@ -1325,7 +1449,7 @@ namespace BasisClasses // The scale in EmpCylSL is assumed to be 1 so we compute the // height relative to the length // - double H = hcyl/acyl; + double H = sech2 ? 0.5*hcyl/acyl : hcyl/acyl; // The model instance (you can add others in DiskModels.H). // It's MN or Exponential if not MN. @@ -1362,7 +1486,7 @@ namespace BasisClasses // Orthogonality sanity check // - orthoTest(); + if (myid==0) orthoTest(); // Set cylindrical coordindates // @@ -1411,6 +1535,24 @@ namespace BasisClasses tpotl0, tpotl - tpotl0, tpotl, tpotx, tpoty, tpotz}; } + // Evaluate in cartesian coordinates + std::vector Cylindrical::getAccel(double x, double y, double z) + { + double R = sqrt(x*x + y*y); + double phi = atan2(y, x); + + double tdens0, tdens, tpotl0, tpotl, tpotR, tpotz, tpotp; + + sl->accumulated_eval(R, z, phi, tpotl0, tpotl, tpotR, tpotz, tpotp); + + tdens = sl->accumulated_dens_eval(R, z, phi, tdens0); + + double tpotx = tpotR*x/R - tpotp*y/R ; + double tpoty = tpotR*y/R + tpotp*x/R ; + + return {tpotx, tpoty, tpotz}; + } + // Evaluate in cylindrical coordinates std::vector Cylindrical::cyl_eval(double R, double z, double phi) { @@ -1482,6 +1624,10 @@ namespace BasisClasses CoefClasses::CylStruct* cf = dynamic_cast(coef.get()); + // Cache the current coefficient structure + // + coefret = coef; + for (int m=0; m<=mmax; m++) { // Set to zero on m=0 call only--------+ sl->set_coefs(m, (*cf->coefs).row(m).real(), (*cf->coefs).row(m).imag(), m==0); } @@ -1549,7 +1695,6 @@ namespace BasisClasses "nmaxfid", "rcylmin", "rcylmax", - "acyltbl", "numx", "numy", "numr", @@ -1667,10 +1812,8 @@ namespace BasisClasses // Set characteristic radius defaults // - if (not conf["acyltbl"]) conf["acyltbl"] = 0.6; if (not conf["scale"]) conf["scale"] = 1.0; - // Check for non-null cache file name. This must be specified // to prevent recomputation and unexpected behavior. // @@ -1689,7 +1832,7 @@ namespace BasisClasses // Orthogonality sanity check // - orthoTest(); + if (myid==0) orthoTest(); // Get max threads // @@ -1735,9 +1878,12 @@ namespace BasisClasses cf->nmax = nmax; cf->time = time; - cf->store((2*mmax+1)*nmax); + // Allocate the coefficient storage + cf->store.resize((mmax+1)*nmax); + + // Make the coefficient map cf->coefs = std::make_shared - (cf->store.data(), 2*mmax+1, nmax); + (cf->store.data(), mmax+1, nmax); for (int m=0, m0=0; m<=mmax; m++) { for (int n=0; n(coef.get()); auto & cc = *cf->coefs; + // Cache the current coefficient structure + // + coefret = coef; + // Assign internal coefficient table (doubles) from the complex struct // for (int m=0, m0=0; m<=mmax; m++) { @@ -1806,15 +1956,15 @@ namespace BasisClasses { // Normalization factors // - constexpr double norm0 = 2.0*M_PI * 0.5*M_2_SQRTPI/M_SQRT2; - constexpr double norm1 = 2.0*M_PI * 0.5*M_2_SQRTPI; + constexpr double norm0 = 1.0; + constexpr double norm1 = M_SQRT2; //====================== // Compute coefficients //====================== double R2 = x*x + y*y; - double R = sqrt(R); + double R = sqrt(R2); // Get thread id int tid = omp_get_thread_num(); @@ -1874,8 +2024,8 @@ namespace BasisClasses int tid = omp_get_thread_num(); // Fixed values - constexpr double norm0 = 0.5*M_2_SQRTPI/M_SQRT2; - constexpr double norm1 = 0.5*M_2_SQRTPI; + constexpr double norm0 = 1.0; + constexpr double norm1 = M_SQRT2; double den0=0, den1=0, pot0=0, pot1=0, rpot=0, zpot=0, ppot=0; @@ -1967,6 +2117,97 @@ namespace BasisClasses return {den0, den1, den0+den1, pot0, pot1, pot0+pot1, rpot, zpot, ppot}; } + std::vector FlatDisk::getAccel(double x, double y, double z) + { + // Get thread id + int tid = omp_get_thread_num(); + + // Fixed values + constexpr double norm0 = 0.5*M_2_SQRTPI/M_SQRT2; + constexpr double norm1 = 0.5*M_2_SQRTPI; + + // Compute polar coordinates + double R = std::sqrt(x*x + y*y); + double phi = std::atan2(y, x); + + + double rpot=0, zpot=0, ppot=0; + + // Off grid evaluation + if (R>ortho->getRtable() or fabs(z)>ortho->getRtable()) { + double r2 = R*R + z*z; + double r = sqrt(r2); + + rpot = -totalMass*R/(r*r2 + 10.0*std::numeric_limits::min()); + zpot = -totalMass*z/(r*r2 + 10.0*std::numeric_limits::min()); + + return {rpot, zpot, ppot}; + } + + // Get the basis fields + // + ortho->get_pot (potd[tid], R, z); + ortho->get_rforce (potR[tid], R, z); + ortho->get_zforce (potZ[tid], R, z); + + // m loop + // + for (int m=0, moffset=0; m<=mmax; m++) { + + if (m==0 and NO_M0) { moffset++; continue; } + if (m==1 and NO_M1) { moffset += 2; continue; } + if (EVEN_M and m/2*2 != m) { moffset += 2; continue; } + if (m>0 and M0_only) break; + + if (m==0) { + for (int n=std::max(0, N1); n<=std::min(nmax-1, N2); n++) { + rpot += expcoef(0, n) * potR[tid](0, n) * norm0; + zpot += expcoef(0, n) * potZ[tid](0, n) * norm0; + } + + moffset++; + } else { + double cosm = cos(phi*m), sinm = sin(phi*m); + double vc, vs; + + vc = vs = 0.0; + for (int n=std::max(0, N1); n<=std::min(nmax-1, N2); n++) { + vc += expcoef(moffset+0, n) * potd[tid](m, n); + vs += expcoef(moffset+1, n) * potd[tid](m, n); + } + + ppot += (-vc*sinm + vs*cosm) * m * norm1; + + vc = vs = 0.0; + for (int n=std::max(0, N1); n<=std::min(nmax-1, N2); n++) { + vc += expcoef(moffset+0, n) * potR[tid](m, n); + vs += expcoef(moffset+1, n) * potR[tid](m, n); + } + + rpot += (vc*cosm + vs*sinm) * norm1; + + vc = vs = 0.0; + for (int n=std::max(0, N1); n<=std::min(nmax-1, N2); n++) { + vc += expcoef(moffset+0, n) * potZ[tid](m, n); + vs += expcoef(moffset+1, n) * potZ[tid](m, n); + } + + zpot += (vc*cosm + vs*sinm) * norm1; + + moffset +=2; + } + } + + rpot *= -1.0; + zpot *= -1.0; + ppot *= -1.0; + + double potx = rpot*x/R - ppot*y/R; + double poty = rpot*y/R + ppot*x/R; + + return {potx, poty, zpot}; + } + std::vector FlatDisk::sph_eval(double r, double costh, double phi) { @@ -1994,8 +2235,8 @@ namespace BasisClasses auto v = cyl_eval(R, z, phi); - double potx = v[4]*x/R - v[8]*y/R; - double poty = v[4]*y/R + v[8]*x/R; + double potx = v[6]*x/R - v[8]*y/R; + double poty = v[6]*y/R + v[8]*x/R; return {v[0], v[1], v[2], v[3], v[4], v[5], potx, poty, v[7]}; } @@ -2139,7 +2380,7 @@ namespace BasisClasses // Orthogonality sanity check // - orthoTest(); + if (myid==0) orthoTest(); // Get max threads // @@ -2438,9 +2679,12 @@ namespace BasisClasses cf->nmax = nmax; cf->time = time; - cf->store((2*mmax+1)*nmax); + // Allocate the coefficient storage + cf->store.resize((mmax+1)*nmax); + + // Make the coefficient map cf->coefs = std::make_shared - (cf->store.data(), 2*mmax+1, nmax); + (cf->store.data(), mmax+1, nmax); for (int m=0, m0=0; m<=mmax; m++) { for (int n=0; n(coef.get()); auto & cc = *cf->coefs; + // Cache the current coefficient structure + coefret = coef; + // Assign internal coefficient table (doubles) from the complex struct // for (int m=0, m0=0; m<=mmax; m++) { @@ -2517,7 +2764,7 @@ namespace BasisClasses //====================== double R2 = x*x + y*y; - double R = sqrt(R); + double R = sqrt(R2); // Get thread id int tid = omp_get_thread_num(); @@ -2567,7 +2814,7 @@ namespace BasisClasses } } - std::vectorCBDisk::cyl_eval(double R, double z, double phi) + std::vector CBDisk::cyl_eval(double R, double z, double phi) { // Get thread id int tid = omp_get_thread_num(); @@ -2645,6 +2892,75 @@ namespace BasisClasses } + std::vector CBDisk::getAccel(double x, double y, double z) + { + // Get thread id + int tid = omp_get_thread_num(); + + // Fixed values + constexpr double norm0 = 1.0; + constexpr double norm1 = M_SQRT2; + + double R = std::sqrt(x*x + y*y); + double phi = std::atan2(y, x); + + double rpot=0, zpot=0, ppot=0; + + // Get the basis fields + // + get_pot (potd[tid], R); + get_force (potR[tid], R); + + // m loop + // + for (int m=0, moffset=0; m<=mmax; m++) { + + if (m==0 and NO_M0) { moffset++; continue; } + if (m==1 and NO_M1) { moffset += 2; continue; } + if (EVEN_M and m/2*2 != m) { moffset += 2; continue; } + if (m>0 and M0_only) break; + + if (m==0) { + for (int n=std::max(0, N1); n<=std::min(nmax-1, N2); n++) { + rpot += expcoef(0, n) * potR[tid](0, n) * norm0; + } + + moffset++; + } else { + double cosm = cos(phi*m), sinm = sin(phi*m); + double vc, vs; + + vc = vs = 0.0; + for (int n=std::max(0, N1); n<=std::min(nmax-1, N2); n++) { + vc += expcoef(moffset+0, n) * potd[tid](m, n); + vs += expcoef(moffset+1, n) * potd[tid](m, n); + } + + ppot += (-vc*sinm + vs*cosm) * m * norm1; + + vc = vs = 0.0; + for (int n=std::max(0, N1); n<=std::min(nmax-1, N2); n++) { + vc += expcoef(moffset+0, n) * potR[tid](m, n); + vs += expcoef(moffset+1, n) * potR[tid](m, n); + } + + rpot += (vc*cosm + vs*sinm) * norm1; + + moffset +=2; + } + } + + rpot *= -1.0; + ppot *= -1.0; + + + double potx = rpot*x/R - ppot*y/R; + double poty = rpot*y/R + ppot*x/R; + + return {potx, poty, zpot}; + } + + std::vector CBDisk::sph_eval(double r, double costh, double phi) { // Cylindrical coords @@ -2671,8 +2987,8 @@ namespace BasisClasses auto v = cyl_eval(R, z, phi); - double potx = v[4]*x/R - v[8]*y/R; - double poty = v[4]*y/R + v[8]*x/R; + double potx = v[6]*x/R - v[8]*y/R; + double poty = v[6]*y/R + v[8]*x/R; return {v[0], v[1], v[2], v[3], v[4], v[5], potx, poty, v[7]}; } @@ -2810,7 +3126,7 @@ namespace BasisClasses // Orthogonality sanity check // - if (check) orthoTest(); + if (check and myid==0) orthoTest(); // Get max threads // @@ -2882,6 +3198,10 @@ namespace BasisClasses auto cf = dynamic_cast(coef.get()); expcoef = *cf->coefs; + // Cache the current coefficient structure + // + coefret = coef; + coefctr = {0.0, 0.0, 0.0}; } @@ -3044,6 +3364,83 @@ namespace BasisClasses } + std::vector + Slab::getAccel(double x, double y, double z) + { + // Loop indices + // + int ix, iy, iz; + + // Working values + // + std::complex facx, facy, fac, facf; + + // Return values + // + std::complex accx(0.0), accy(0.0), accz(0.0); + + // Recursion multipliers + // + std::complex stepx = exp(kfac*x); + std::complex stepy = exp(kfac*y); + + // Initial values (note sign change) + // + std::complex startx = exp(-static_cast(nmaxx)*kfac*x); + std::complex starty = exp(-static_cast(nmaxy)*kfac*y); + + Eigen::VectorXd vpot(nmaxz), vfrc(nmaxz); + + for (facx=startx, ix=0; ix nmaxx) { + std::cerr << "Out of bounds: ii=" << ii << std::endl; + } + if (iiy > nmaxy) { + std::cerr << "Out of bounds: jj=" << jj << std::endl; + } + + if (iix>=iiy) { + ortho->get_pot (vpot, z, iix, iiy); + ortho->get_force(vfrc, z, iix, iiy); + } + else { + ortho->get_pot (vpot, z, iiy, iix); + ortho->get_force(vfrc, z, iiy, iix); + } + + + for (int iz=0; iz(ii)*fac; + accy += -kfac*static_cast(jj)*fac; + accz += -facf; + + } + } + } + + return {accx.real(), accy.real(), accz.real()}; + } + std::vector Slab::crt_eval(double x, double y, double z) { @@ -3248,7 +3645,7 @@ namespace BasisClasses // Orthogonality sanity check // - if (check) orthoTest(); + if (check and myid==0) orthoTest(); // Get max threads // @@ -3314,6 +3711,10 @@ namespace BasisClasses auto cf = dynamic_cast(coef.get()); expcoef = *cf->coefs; + // Cache the cuurent coefficient structure + // + coefret = coef; + coefctr = {0.0, 0.0, 0.0}; } @@ -3402,6 +3803,20 @@ namespace BasisClasses return {0, den1, den1, 0, pot1, pot1, frcx, frcy, frcz}; } + std::vector Cube::getAccel(double x, double y, double z) + { + // Get thread id + int tid = omp_get_thread_num(); + + // Position vector + Eigen::Vector3d pos {x, y, z}; + + // Get the basis fields + auto frc = ortho->get_force(expcoef, pos); + + return {-frc(0).real(), -frc(1).real(), -frc(2).real()}; + } + std::vector Cube::cyl_eval(double R, double z, double phi) { // Get thread id @@ -3696,6 +4111,7 @@ namespace BasisClasses // Get expansion center // auto ctr = basis->getCenter(); + if (basis->usingNonInertial()) ctr = {0, 0, 0}; // Get fields // @@ -3704,10 +4120,34 @@ namespace BasisClasses auto v = basis->getFields(ps(n, 0) - ctr[0], ps(n, 1) - ctr[1], ps(n, 2) - ctr[2]); - // First 6 fields are density and potential, follewed by acceleration + // First 6 fields are density and potential, followed by acceleration for (int k=0; k<3; k++) accel(n, k) += v[6+k] - basis->pseudo(k); } + // true for deep debugging + // | + // v + if (false and basis->usingNonInertial()) { + + auto coefs = basis->getCoefficients(); + auto time = coefs->time; + auto ctr = coefs->ctr; + + std::ofstream tmp; + if (time <= 0.0) tmp.open("pseudo.dat"); + else tmp.open("pseudo.dat", ios::app); + + if (tmp) + tmp << std::setw(16) << std::setprecision(5) << time + << std::setw(16) << std::setprecision(5) << ctr[0] + << std::setw(16) << std::setprecision(5) << ctr[1] + << std::setw(16) << std::setprecision(5) << ctr[2] + << std::setw(16) << std::setprecision(5) << basis->pseudo(0) + << std::setw(16) << std::setprecision(5) << basis->pseudo(1) + << std::setw(16) << std::setprecision(5) << basis->pseudo(2) + << std::endl; + } + return accel; } @@ -3734,13 +4174,12 @@ namespace BasisClasses throw std::runtime_error(sout.str()); } - auto it1 = std::lower_bound(times.begin(), times.end(), t); - auto it2 = it1 + 1; + auto it2 = std::lower_bound(times.begin(), times.end(), t); + auto it1 = it2; - if (it2 == times.end()) { - it2--; - it1 = it2 - 1; - } + if (it2 == times.end()) throw std::runtime_error("Basis::AllTimeAccel::evalcoefs: time t=" + std::to_string(t) + " out of bounds"); + else if (it2 == times.begin()) it2++; + else it1--; double a = (*it2 - t)/(*it2 - *it1); double b = (t - *it1)/(*it2 - *it1); @@ -3798,13 +4237,13 @@ namespace BasisClasses throw std::runtime_error(sout.str()); } - auto it1 = std::lower_bound(times.begin(), times.end(), t); - auto it2 = it1 + 1; - - if (it2 == times.end()) { - it2--; - it1 = it2 - 1; - } + auto it2 = std::lower_bound(times.begin(), times.end(), t); + auto it1 = it2; + + if (it2 == times.end()) + throw std::runtime_error("Basis::SingleTimeAccel::evalcoefs: time t=" + std::to_string(t) + " out of bounds"); + else if (it2 == times.begin()) it2++; + else it1--; double a = (*it2 - t)/(*it2 - *it1); double b = (t - *it1)/(*it2 - *it1); @@ -3851,25 +4290,93 @@ namespace BasisClasses { int rows = ps.rows(); - // Drift 1/2 - for (int n=0; n kf(4); + for (int i=0; i<4; i++) kf[i].resize(rows, 6); + // Step 1 + // + accel.setZero(); + for (auto mod : bfe) F(t, ps, accel, mod); + for (int n=0; n(t+h, ps); } diff --git a/expui/Coefficients.cc b/expui/Coefficients.cc index 52d8f69e7..b01cb0c04 100644 --- a/expui/Coefficients.cc +++ b/expui/Coefficients.cc @@ -2573,6 +2573,18 @@ namespace CoefClasses void Coefs::WriteH5Coefs(const std::string& prefix) { + // Sanity check: throw runtime error if there are no coefficient + // sets + // + if (Times().size() == 0) { + throw std::runtime_error + ("Coefs::WriteH5Coefs: " + "we have NO coefficient sets...continuing without writing" + ); + } + + // Write coefficients + // try { // Create a new hdf5 file // diff --git a/expui/FieldGenerator.H b/expui/FieldGenerator.H index f342d5dc5..65b2624ae 100644 --- a/expui/FieldGenerator.H +++ b/expui/FieldGenerator.H @@ -119,7 +119,7 @@ namespace Field std::vector center={0.0, 0.0, 0.0}); //! Compute spherical log histogram from particles - std::tuple + std::tuple histo1dlog(PR::PRptr reader, double rmin, double rmax, int nbins, std::vector center={0.0, 0.0, 0.0}); diff --git a/expui/FieldGenerator.cc b/expui/FieldGenerator.cc index c79c180b9..9bf4ca921 100644 --- a/expui/FieldGenerator.cc +++ b/expui/FieldGenerator.cc @@ -919,7 +919,7 @@ namespace Field } // END histogram1d - std::tuple + std::tuple FieldGenerator::histo1dlog(PR::PRptr reader, double rmin, double rmax, int nbins, std::vector ctr) { @@ -930,6 +930,9 @@ namespace Field Eigen::VectorXf rad = Eigen::VectorXf::Zero(nbins); Eigen::VectorXf ret = Eigen::VectorXf::Zero(nbins); + Eigen::VectorXf vel = Eigen::VectorXf::Zero(nbins); + Eigen::MatrixXf vc2 = Eigen::MatrixXf::Zero(nbins, 3); + Eigen::MatrixXf vc1 = Eigen::MatrixXf::Zero(nbins, 3); double lrmin = log(rmin), lrmax = log(rmax); double del = (lrmax - lrmin)/nbins; @@ -944,18 +947,35 @@ namespace Field } int indx = floor((log(sqrt(rad)) - lrmin)/del); - if (indx>=0 and indxmass; + if (indx>=0 and indxmass; + for (int k=0; k<3; k++) { + double v = p->vel[k]; + vc1(indx, k) += p->mass * v; + vc2(indx, k) += p->mass * v*v; + } + } } // Accumulate between MPI nodes; return value to root node // if (use_mpi) { - if (myid==0) + if (myid==0) { MPI_Reduce(MPI_IN_PLACE, ret.data(), ret.size(), MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); - else + MPI_Reduce(MPI_IN_PLACE, vc1.data(), vc1.size(), + MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(MPI_IN_PLACE, vc2.data(), vc2.size(), + MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); + } + else { MPI_Reduce(ret.data(), NULL, ret.size(), MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(vc1.data(), NULL, vc1.size(), + MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(vc2.data(), NULL, vc2.size(), + MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); + } } // Compute density @@ -963,11 +983,28 @@ namespace Field double d3 = exp(3.0*del); double rf = 4.0*pi/3.0*(d3 - 1.0); for (int i=0; i 0) { + for (int k=0; k<3; k++) { + vc1(i, k) /= ret[i]; + vc2(i, k) /= ret[i]; + sig += vc2(i, k) - vc1(i, k)*vc1(i, k); + } + rad[i] = exp(lrmin + del*(0.5 + i)); + ret[i] /= exp(3.0*(lrmin + del*i)) * rf; + vel[i] = sqrt(fabs(sig)); + } else { + for (int k=0; k<3; k++) { + vc1(i, k) = 0.0; + vc2(i, k) = 0.0; + } + rad[i] = exp(lrmin + del*(0.5 + i)); + ret[i] = 0.0; + vel[i] = 0.0; + } } - return {rad, ret}; + return {rad, ret, vel}; } // END histo1dlog diff --git a/expui/expMSSA.H b/expui/expMSSA.H index 6e3563f4c..63cc27b35 100644 --- a/expui/expMSSA.H +++ b/expui/expMSSA.H @@ -84,8 +84,8 @@ namespace MSSA //! Number of components in the reconstruction int ncomp; - //! Normalization values - double totVar, totPow; + //! Normalization options + bool totVar, totPow; //! Toggle for detrending bool useMean; diff --git a/expui/expMSSA.cc b/expui/expMSSA.cc index 176ae7715..0b6b5caf4 100644 --- a/expui/expMSSA.cc +++ b/expui/expMSSA.cc @@ -1522,8 +1522,8 @@ namespace MSSA { HighFive::Group recon = file.createGroup("reconstruction"); recon.createAttribute ("ncomp", HighFive::DataSpace::From(ncomp) ).write(ncomp); - recon.createAttribute("totVar", HighFive::DataSpace::From(totVar)).write(totVar); - recon.createAttribute("totPow", HighFive::DataSpace::From(totVar)).write(totPow); + recon.createAttribute("totVar", HighFive::DataSpace::From(totVar)).write(totVar); + recon.createAttribute("totPow", HighFive::DataSpace::From(totPow)).write(totPow); for (int n=0; n(); + if (params["totPow"]) totPow = params["totPow"].as(); + + if (totPow==true) { + if (totVar==true) { + std::cerr << "expMSSA: both totVar and totPow are set to true." + << "Using totPow." << std::endl; + totVar = false; + } type = TrendType::totPow; - else + } else if (totVar==true) { + type = TrendType::totVar; + } else { + // if nothing set go default type = TrendType::perChannel; - + } // Set the SVD strategy for mSSA // diff --git a/exputil/DiskDensityFunc.cc b/exputil/DiskDensityFunc.cc index 2c6f00bc5..1437abb17 100644 --- a/exputil/DiskDensityFunc.cc +++ b/exputil/DiskDensityFunc.cc @@ -1,5 +1,7 @@ #include +#ifdef HAVE_PYTHON3 + namespace py = pybind11; DiskDensityFunc::DiskDensityFunc(const std::string& modulename, @@ -29,3 +31,23 @@ double DiskDensityFunc::operator() (double R, double z, double phi) { return disk_density(R, z, phi).cast(); } + +#else + +DiskDensityFunc::DiskDensityFunc(const std::string& modulename, + const std::string& funcname) + : funcname(funcname) +{ + throw std::runtime_error("DiskDensityFunc: you environoment does not have Python3 support. Use a built-in density target or install Python3 and recompile"); +} + +DiskDensityFunc::~DiskDensityFunc() +{ +} + +double DiskDensityFunc::operator() (double R, double z, double phi) +{ + return 0.0; +} + +#endif diff --git a/exputil/EXPmath.cc b/exputil/EXPmath.cc index 74189d649..feed74357 100644 --- a/exputil/EXPmath.cc +++ b/exputil/EXPmath.cc @@ -61,12 +61,12 @@ namespace AltMath return ans; } -#define ACC 40.0 -#define BIGNO 1.0e10 -#define BIGNI 1.0e-10 - double bessj(int n,double x) { + const double ACC = 40.0; + const double BIGNO = 1.0e10; + const double BIGNI = 1.0e-10; + int j,jsum,m; double ax,bj,bjm,bjp,sum,tox,ans; @@ -114,10 +114,6 @@ namespace AltMath return x < 0.0 && n%2 == 1 ? -ans : ans; } -#undef ACC -#undef BIGNO -#undef BIGNI - double bessk0(double x) { double y,ans; @@ -174,12 +170,12 @@ namespace AltMath return ans; } -#define ACC 40.0 -#define BIGNO 1.0e10 -#define BIGNI 1.0e-10 - double bessi(int n,double x) { + const double ACC = 40.0; + const double BIGNO = 1.0e10; + const double BIGNI = 1.0e-10; + int j; double bi,bim,bip,tox,ans; double bessi0(double ); @@ -207,10 +203,6 @@ namespace AltMath } } -#undef ACC -#undef BIGNO -#undef BIGNI - double bessi0(double x) { double ax,ans; @@ -253,12 +245,12 @@ namespace AltMath return x < 0.0 ? -ans : ans; } -#define ACC 40.0 -#define BIGNO 1.0e10 -#define BIGNI 1.0e-10 - double jn_sph(int n, double x) { + const double ACC = 40.0; + const double BIGNO = 1.0e10; + const double BIGNI = 1.0e-10; + int j,m; double ax,bj,bjm,bjp,tox,ans; double jn_sph0(double x),jn_sph1(double x),jn_sphm1(double x); @@ -306,13 +298,10 @@ namespace AltMath return x < 0.0 && n%2 == 1 ? -ans : ans; } -#undef ACC -#undef BIGNO -#undef BIGNI - -#define TOL 1.0e-7 double jn_sph0(double x) { + const double TOL = 1.0e-7; + if (fabs(x) recomputing cache for HighFive API change" << std::endl; return false; } diff --git a/exputil/EmpCylSL.cc b/exputil/EmpCylSL.cc index 71135d576..9024c0e47 100644 --- a/exputil/EmpCylSL.cc +++ b/exputil/EmpCylSL.cc @@ -267,7 +267,7 @@ EmpCylSL::EmpCylSL(int mlim, std::string cachename) if (myid==0) std::cout << "---- EmpCylSL::ReadH5Cache: " << "using a workaround for a HighFive HDF5 wrapper bug. " - << "this workaround will be removed in EXP 7.9.0. " + << "this workaround will be removed in EXP 7.9.0. " << "Please consider rebuilding your cache if possible!" << std::endl; } @@ -399,7 +399,7 @@ void EmpCylSL::reset(int numr, int lmax, int mmax, int nord, ortho = std::make_shared(make_sl(), LMAX, NMAX, NUMR, RMIN, RMAX*0.99, false, 1, 1.0); - orthoTest(ortho->orthoCheck(std::max(NMAX*50, 200)), "EmpCylSL[SLGridSph]", "l"); + orthoTest(ortho->orthoCheck(std::max(NMAX*50, 200)), "EmpCylSL [SLGridSph]", "l"); // Resize (should not be necessary) but just in case some future // feature changes mulitstep on the fly @@ -878,7 +878,7 @@ int EmpCylSL::read_eof_file(const string& eof_file) ortho = std::make_shared(make_sl(), LMAX, NMAX, NUMR, RMIN, RMAX*0.99, false, 1, 1.0); - orthoTest(ortho->orthoCheck(std::max(NMAX*50, 200)), "EmpCylSL[SLGridSph]", "l"); + orthoTest(ortho->orthoCheck(std::max(NMAX*50, 200)), "EmpCylSL [SLGridSph]", "l"); setup_eof(); setup_accumulation(); @@ -1450,7 +1450,7 @@ void EmpCylSL::compute_eof_grid(int request_id, int m) ortho = std::make_shared(make_sl(), LMAX, NMAX, NUMR, RMIN, RMAX*0.99, false, 1, 1.0); - orthoTest(ortho->orthoCheck(std::max(NMAX*50, 200)), "EmpCylSL[SLGridSph]", "l"); + orthoTest(ortho->orthoCheck(std::max(NMAX*50, 200)), "EmpCylSL [SLGridSph]", "l"); } @@ -1633,7 +1633,7 @@ void EmpCylSL::compute_even_odd(int request_id, int m) LMAX, NMAX, NUMR, RMIN, RMAX*0.99, false, 1, 1.0); - orthoTest(ortho->orthoCheck(std::max(NMAX*50, 200)), "EmpCylSL[SLGridSph]", "l"); + orthoTest(ortho->orthoCheck(std::max(NMAX*50, 200)), "EmpCylSL [SLGridSph]", "l"); } double dens, potl, potr, pott; diff --git a/exputil/ParticleReader.cc b/exputil/ParticleReader.cc index f3aa05107..ea63c0400 100644 --- a/exputil/ParticleReader.cc +++ b/exputil/ParticleReader.cc @@ -1,3 +1,4 @@ +#include #include #include #include @@ -7,7 +8,11 @@ #include #include #include - +#include + // HighFive API +#include +#include +#include #include // YAML support @@ -35,6 +40,18 @@ namespace PR { _files = files; // Copy file list (bunch) _verbose = verbose; + // Do we have a directory of snapshot files? + // + if (_files.size()==1) { + std::vector fscan = scanDirectory(_files[0]); + + // Did we find files? + // + if (fscan.size() != 0) { + _files = fscan; + } + } + ptype = 1; // Default is halo particles getNumbers(); // Get the number of particles in all @@ -305,6 +322,18 @@ namespace PR { _files = files; _verbose = verbose; + // Do we have a directory of snapshot files? + // + if (_files.size()==1) { + std::vector fscan = scanDirectory(_files[0]); + + // Did we find files? + // + if (fscan.size() != 0) { + _files = fscan; + } + } + ptype = 1; // Default is halo particles totalCount = 0; // Initialization of particles read @@ -313,7 +342,7 @@ namespace PR { curfile = _files.begin(); if (not nextFile()) { - std::cerr << "GadgetNative: no files found" << std::endl; + std::cerr << "GadgetHDF5: no files found" << std::endl; } } @@ -635,8 +664,579 @@ namespace PR { else return 0; } } + + std::vector + ParticleReader::scanDirectory(const std::string& dir) + { + // 'ret' will contain the filenames from the directory scan + std::vector ret; + + // The directory path + std::filesystem::path p(dir); + + // Check if the directory exists. If the directory does not + // exist, then 'ret' will be returned with zero length + if (std::filesystem::is_directory(p)) { + + // Iterate over the directory entries + for (const auto& entry : std::filesystem::directory_iterator(p)) { + + // Check if the entry is a regular file + if (std::filesystem::is_regular_file(entry.status())) { + + // Get the full path name + std::string filename = entry.path().string(); + + // Check if the last character is a digit, consistent with a + // partial phase-space write and not a backup or metadata + // file + if (std::isdigit(filename.back())) { + + // Assume that this is a snapshot file. We could check if + // this is HDF5 here but I'd rather that this procedure be + // file-type agnostic. + ret.push_back(filename); + } + } + } + } + + return ret; + } + + + PSPhdf5::PSPhdf5(const std::vector& files, bool verbose) + { + _files = files; + _verbose = verbose; + + // Do we have a directory of snapshot files? + // + if (_files.size()==1) { + std::vector fscan = scanDirectory(_files[0]); + + // Did we find files? + // + if (fscan.size() != 0) { + _files = fscan; + + // Put the first file at the top of the list for metadata + // reading + std::partial_sort(_files.begin(), _files.begin()+1, _files.end()); + } + } + + // Read metadata from the first file + // + getInfo(); + + // Sanity check + // + if (nfiles != _files.size()) + throw GenericError("PSPhdf5: number of files does not match number expected for this snapshot", __FILE__, __LINE__, 1042, true); + + curfile = _files.begin(); + + if (not nextFile()) { + std::cerr << "PSPhdf5: no files found" << std::endl; + } + } + + void PSPhdf5::getInfo() + { + // Read one file and get metadata + // + try { + // Silence the HDF5 error stack + // + HighFive::SilenceHDF5 quiet; + + // Try opening the file + // + HighFive::File h5file(_files[0], HighFive::File::ReadOnly); + + try { + + HighFive::Group header = h5file.getGroup("Header"); + HighFive::Group config = h5file.getGroup("Config"); + HighFive::Group params = h5file.getGroup("Parameters"); + + // Get particle numbers + // + header.getAttribute("NumPart_Total").read(nptot); + totalCount = 0; + for (auto v : nptot) totalCount += v; + + // Get particle masses + // + header.getAttribute("MassTable").read(mass); + + // Number of files + // + header.getAttribute("NumFilesPerSnapshot").read(nfiles); + + // Get component names + // + params.getAttribute("ComponentNames").read(comps); + + // Set some defaults + // + curcomp = comps[0]; + curindx = 0; + + // Get packing type (PSP or Gadget4; PSP is default) + // + int style; + config.getAttribute("PSPstyle").read(style); + gadget4 = (style == 0); + + // Get number of types + // + config.getAttribute("NTYPES").read(ntypes); + + config.getAttribute("Niattrib").read(Niattrib); + + config.getAttribute("Ndattrib").read(Ndattrib); + + // Real data type + // + int dp; + config.getAttribute("DOUBLEPRECISION").read(dp); + if (dp) real4 = false; + else real4 = true; + + } catch (HighFive::Exception& err) { + std::string msg("PSPhdf5: error reading HDF5 file, "); + throw std::runtime_error(msg + err.what()); + } + + } catch (HighFive::Exception& err) { + if (myid==0) + std::cerr << "---- PSPhdf5: " + << "error opening as HDF5, trying EXP native and ascii table" + << std::endl; + } + } + + bool PSPhdf5::nextFile() + { + if (curfile==_files.end()) return false; + if (real4) read_and_load(); + else read_and_load(); + curfile++; + return true; + } + + + template + void PSPhdf5::read_and_load() + { + if (gadget4) read_and_load_gadget4(); + else read_and_load_psp(); + } + + template + void PSPhdf5::read_and_load_gadget4() + { + // Try to catch and HDF5 and parsing errors + // + try { + // Silence the HDF5 error stack + // + HighFive::SilenceHDF5 quiet; + + // Try opening the file + // + HighFive::File h5file(*curfile, HighFive::File::ReadOnly); + + try { + + HighFive::Group header = h5file.getGroup("Header"); + + // Get time + // + header.getAttribute("Time").read(time); + + // Get particle counts + // + header.getAttribute("NumPart_ThisFile").read(npart); + + if (npart[curindx] > 0) { + // Make the group name + // + std::ostringstream sout; + sout << "PartType" << curindx; + + // Open the phase-space group + // + HighFive::Group part = h5file.getGroup(sout.str()); + + Eigen::Matrix pos, vel; + std::vector idx; + std::vector mas, pot, potext; + + Eigen::Matrix iattrib; + Eigen::Matrix rattrib; + + part.getDataSet("ParticleIDs" ).read(idx); + + if (mass[curindx] == 0) + part.getDataSet("Masses").read(mas); + + part.getDataSet("Coordinates" ).read(pos); + part.getDataSet("Velocities" ).read(vel); + + part.getDataSet("Potential" ).read(pot); + part.getDataSet("PotentialExt").read(potext); + + if (Niattrib[curindx]>0) + part.getDataSet("IntAttributes" ).read(iattrib); + + if (Ndattrib[curindx]>0) + part.getDataSet("RealAttributes" ).read(rattrib); + + // Clear and load the particle vector + // + particles.clear(); + + Particle P; // Working particle will be copied + + // Load from the HDF5 datasets + // + for (int n=0; n 0) P.mass = mass[curindx]; + else P.mass = mas[n]; + + P.level = 0; + + for (int k=0; k<3; k++) { + P.pos[k] = pos(n, k); + P.vel[k] = vel(n, k); + } + + P.pot = pot[n]; + P.potext = potext[n]; + + if (Niattrib[curindx]>0) { + P.iattrib.resize(Niattrib[curindx]); + for (int j=0; j0) { + P.dattrib.resize(Ndattrib[curindx]); + for (int j=0; j + struct H5Particle + { + unsigned long id; + T mass; + T pos[3]; + T vel[3]; + T pot; + T potext; + hvl_t iattrib; + hvl_t dattrib; + }; + + // Read and return a scalar HDF5 attribute + template + T readScalar(H5::Group& group, const std::string& name) + { + H5::DataType type; + + // Create a int datatype + if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_INT; + } + else if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_ULONG; + } + else if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_FLOAT; + } + else if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_DOUBLE; + } + else if constexpr (std::is_same_v) { + type = H5::StrType(0, H5T_VARIABLE); + } + else { + assert(0); + } + + // Create the attribute + H5::Attribute attribute = group.openAttribute(name); + + T ret; + + // Treat string separately + if constexpr (std::is_same_v) { + attribute.read(type, &ret); + } else + attribute.read(type, &ret); + + return ret; + }; + + + // Read and return a std::vector HDF5 attribute + template + std::vector readVector(H5::Group& group, const std::string& name) + { + H5::DataType type; + + // Create a int datatype + if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_INT; + } + else if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_ULONG; + } + else if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_FLOAT; + } + else if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_DOUBLE; + } + else if constexpr (std::is_same_v) { + type = H5::StrType(0, H5T_VARIABLE); + } + else { + assert(0); + } + + // Create the attribute + H5::Attribute attribute = group.openAttribute(name); + H5::DataType attributeType = attribute.getDataType(); + hsize_t attributeSize = attribute.getSpace().getSimpleExtentNpoints(); + + // Create the return value + std::vector ret(attributeSize); + + + // Treat string separately + if constexpr (std::is_same_v) { + // Allocate memory for string pointers + std::vector stringPointers(attributeSize); + + // Read the string data + attribute.read(attributeType, stringPointers.data()); + + // Create std::vector + for (int i = 0; i < attributeSize; ++i) { + ret.emplace_back(stringPointers[i]); + } + + // Release memory allocated by HDF5 + H5::DataSet::vlenReclaim(stringPointers.data(), attributeType, attribute.getSpace()); + } else + attribute.read(attributeType, ret.data()); + + return ret; + }; + + + template + void PSPhdf5::read_and_load_psp() + { + // Try to catch and HDF5 and parsing errors + // + try { + // Try opening the file + // + H5::H5File file(*curfile, H5F_ACC_RDONLY); + + try { + + // Define the compound datatype + // + H5::CompType compound_type(sizeof(H5Particle)); + + H5::DataType type; + if constexpr (std::is_same_v) + type = H5::PredType::NATIVE_DOUBLE; + else if constexpr (std::is_same_v) + type = H5::PredType::NATIVE_FLOAT; + else assert(0); + + // Add members + compound_type.insertMember("id", HOFFSET(H5Particle, id), H5::PredType::NATIVE_INT); + compound_type.insertMember("mass", HOFFSET(H5Particle, mass), type); + + hsize_t dims3[1] = {3}; + H5::ArrayType array3_type(type, 1, dims3); + + compound_type.insertMember("pos", HOFFSET(H5Particle, pos ), array3_type); + compound_type.insertMember("vel", HOFFSET(H5Particle, vel ), array3_type); + compound_type.insertMember("pot", HOFFSET(H5Particle, pot ), type); + compound_type.insertMember("potext", HOFFSET(H5Particle, potext), type); + compound_type.insertMember("iattrib", HOFFSET(H5Particle, iattrib), H5::VarLenType(H5::PredType::NATIVE_INT)); + compound_type.insertMember("dattrib", HOFFSET(H5Particle, dattrib), H5::VarLenType(type)); + + // Open the particle group + // + H5::Group header = file.openGroup("Header"); + + // Get time + // + time = readScalar(header, "Time"); + + // Get particle counts + // + npart = readVector(header, "NumPart_ThisFile"); + + if (npart[curindx] > 0) { + // Make the group name + // + std::ostringstream sout; + sout << "PartType" << curindx; + + // Open the phase-space group + // + H5::Group part = file.openGroup(sout.str()); + + // Clear and load the particle vector + // + particles.clear(); + + // Working particle; will be copied to particles array + // + Particle P; + + // Get the particle dataset from HDF5 + // + H5::DataSet dataset = part.openDataSet("particles"); + H5::DataSpace dataspace = dataset.getSpace(); + + // Sanity check + // + hsize_t dims[1]; + dataspace.getSimpleExtentDims(dims); + size_t num_elements = dims[0]; + + if (num_elements != npart[curindx]) + throw std::runtime_error("PSPhdf5: number of particles in file does not match the number in header"); + + // Read particles + // + std::vector> h5part(num_elements); + dataset.read(h5part.data(), compound_type); + + // Load from the HDF5 dataset + // + for (int n=0; n0) { + P.iattrib.resize(Niattrib[curindx]); + // Access data by pointer + int* ptr = (int*)h5part[n].iattrib.p; + for (int j=0; j0) { + P.dattrib.resize(Ndattrib[curindx]); + // Access data by pointer + Scalar* ptr = (Scalar*)h5part[n].dattrib.p; + for (int j=0; j(); + template void PSPhdf5::read_and_load(); + template void PSPhdf5::read_and_load_gadget4(); + template void PSPhdf5::read_and_load_gadget4(); + template void PSPhdf5::read_and_load_psp(); + template void PSPhdf5::read_and_load_psp(); + + const Particle* PSPhdf5::firstParticle() + { + pcount = 0; + + return & particles[pcount++]; + } + const Particle* PSPhdf5::nextParticle() + { + if (pcount < particles.size()) { + return & particles[pcount++]; + } else { + if (nextFile()) return firstParticle(); + else return 0; + } + } + + bool badstatus(istream& in) { ios::iostate i = in.rdstate(); @@ -1295,7 +1895,8 @@ namespace PR { std::vector ParticleReader::readerTypes - {"PSPout", "PSPspl", "GadgetNative", "GadgetHDF5", "TipsyNative", "TipsyXDR", "Bonsai"}; + {"PSPout", "PSPspl", "GadgetNative", "GadgetHDF5", "PSPhdf5", + "TipsyNative", "TipsyXDR", "Bonsai"}; std::vector> @@ -1315,6 +1916,28 @@ namespace PR { return parseStringList(files, delimit); } + // Are all the files in the list directories? + bool + ParticleReader::parseDirectoryList(const std::vector& files) + { + int dcount = 0; // The directory count + int fcount = 0; // The file count + + for (auto f : files) { + if (std::filesystem::is_directory(f)) + dcount++; + else + fcount++; + } + + if (dcount>0 and fcount>0) + throw std::runtime_error("ParticleReader::parseDirectoryList: " + "cannot mix directories and files"); + + return dcount>0; + } + + std::vector> ParticleReader::parseStringList (const std::vector& infiles, const std::string& delimit) @@ -1325,6 +1948,14 @@ namespace PR { auto files = infiles; std::sort(files.begin(), files.end()); + // Check to see if these are all directories + if (parseDirectoryList(files)) { + for (auto d : files) { + batches.push_back(std::vector{d}); + } + return batches; + } + std::vector batch; std::string templ; @@ -1359,6 +1990,9 @@ namespace PR { } } + // Final batch + if (batch.size()) batches.push_back(batch); + return batches; } @@ -1373,6 +2007,8 @@ namespace PR { ret = std::make_shared(file, verbose); else if (reader.find("PSPspl") == 0) ret = std::make_shared(file, verbose); + else if (reader.find("PSPhdf5") == 0) + ret = std::make_shared(file, verbose); else if (reader.find("GadgetNative") == 0) ret = std::make_shared(file, verbose); else if (reader.find("GadgetHDF5") == 0) @@ -1503,8 +2139,21 @@ namespace PR { Tipsy::Tipsy(const std::vector& filelist, TipsyType Type, bool verbose) { - ttype = Type; files = filelist; + + // Do we have a directory of snapshot files? + // + if (files.size()==1) { + std::vector fscan = scanDirectory(files[0]); + + // Did we find files? + // + if (fscan.size() != 0) { + files = fscan; + } + } + + ttype = Type; getNumbers(); curfile = files.begin(); if (not nextFile()) { diff --git a/exputil/massmodel_dist.cc b/exputil/massmodel_dist.cc index 27e2d877f..e6877b48a 100644 --- a/exputil/massmodel_dist.cc +++ b/exputil/massmodel_dist.cc @@ -49,14 +49,14 @@ #include #include -#define OFFSET 1.0e-3 -#define OFFTOL 1.2 +const double OFFSET=1.0e-3; +const double OFFTOL=1.2; extern double gint_0(double a, double b, std::function f, int NGauss); extern double gint_2(double a, double b, std::function f, int NGauss); -#define TSTEP 1.0e-8 -#define NGauss 96 +const double TSTEP=1.0e-8; +const int NGauss=96; static int DIVERGE=0; @@ -109,6 +109,10 @@ void SphericalModelTable::setup_df(int NUM, double RA) rhoQy.resize(num); rhoQy2.resize(num); + rhoQx. setZero(); + rhoQy. setZero(); + rhoQy2.setZero(); + for (int i=0; i::max(); + // The small value is not critical + const double FOFFSET0 = -1.0e-42; + foffset = FOFFSET0; + dfc.Q[dfc.num-1] = Qmax; dfc.ffQ[dfc.num-1] = 0.0; fac = 1.0/(sqrt(8.0)*M_PI*M_PI); @@ -184,6 +196,13 @@ void SphericalModelTable::setup_df(int NUM, double RA) df.ffQ .resize(NUM); df.fQ2 .resize(NUM); df.ffQ2.resize(NUM); + + df.Q .setZero(); + df.fQ .setZero(); + df.ffQ .setZero(); + df.fQ2 .setZero(); + df.ffQ2.setZero(); + df.num = NUM; df.ra2 = ra2; @@ -194,7 +213,8 @@ void SphericalModelTable::setup_df(int NUM, double RA) df.Q[df.num-1] = Qmax; df.ffQ[df.num-1] = 0.0; fac = 1.0/(sqrt(8.0)*M_PI*M_PI); - foffset = -std::numeric_limits::max(); + // foffset = -std::numeric_limits::max(); + foffset = -1.0e42; for (int i=df.num-2; i>=0; i--) { df.Q[i] = df.Q[i+1] - dQ; Q = df.Q[i]; @@ -233,7 +253,7 @@ void SphericalModelTable::setup_df(int NUM, double RA) dist_defined = true; - debug_fdist(); + // debug_fdist(); } @@ -294,7 +314,7 @@ double SphericalModelTable::distf(double E, double L) if (!dist_defined) bomb("distribution function not defined"); - double d, g; + double d=0.0, g=0.0; if (chebyN) { @@ -341,7 +361,7 @@ double SphericalModelTable::dfde(double E, double L) if (!dist_defined) bomb("distribution function not defined"); - double d, g, h, d1; + double d=0, g=0, h=0, d1=0; if (chebyN) { @@ -399,7 +419,7 @@ double SphericalModelTable::dfdl(double E, double L) if (!dist_defined) bomb("distribution function not defined"); - double d, g, h, d1; + double d=0, g=0, h=0, d1=0; if (chebyN) { @@ -451,7 +471,7 @@ double SphericalModelTable::d2fde2(double E, double L) { if (!dist_defined) bomb("distribution function not defined"); - double d, g, h, k, d2; + double d=0, g=0, h=0, k=0, d2=0; if (chebyN) { diff --git a/exputil/orthoTest.cc b/exputil/orthoTest.cc index 99fcb4f2e..9cfb8c82e 100644 --- a/exputil/orthoTest.cc +++ b/exputil/orthoTest.cc @@ -81,6 +81,6 @@ void orthoTest(const std::vector& tests, } else { // Success message if (myid==0) - std::cout << classname + ": biorthogonal check passed" << std::endl; + std::cout << "---- " << classname + ": biorthogonal check passed" << std::endl; } } diff --git a/exputil/realize_model.cc b/exputil/realize_model.cc index 0adfc7b0a..14ffc230c 100644 --- a/exputil/realize_model.cc +++ b/exputil/realize_model.cc @@ -37,6 +37,7 @@ #include #include #include +#include #ifdef DEBUG #include @@ -69,12 +70,10 @@ int AxiSymModel::gen_itmax = 20000; const bool verbose = true; const double ftol = 0.01; -Eigen::VectorXd AxiSymModel::gen_point_2d(int& ierr) +AxiSymModel::PSret AxiSymModel::gen_point_2d() { if (!dist_defined) { - std::cerr << "AxiSymModel: must define distribution before realizing!" - << std::endl; - exit (-1); + throw std::runtime_error("AxiSymModel: must define distribution before realizing!"); } int it; // Iteration counter @@ -87,7 +86,6 @@ Eigen::VectorXd AxiSymModel::gen_point_2d(int& ierr) double Emin = get_pot(rmin); double Emax = get_pot(get_max_radius()); - if (gen_EJ) { if (gen_firstime) { @@ -97,8 +95,9 @@ Eigen::VectorXd AxiSymModel::gen_point_2d(int& ierr) double dx = (Emax - Emin - 2.0*tol)/(numr-1); double dy = (1.0 - gen_kmin - 2.0*tol)/(numj-1); +#ifdef DEBUG std::cout << "gen_point_2d[" << ModelID << "]: " << get_max_radius() << std::endl; - +#endif gen_fomax = 0.0; for (int j=0; jgen_fomax ? zzz : gen_fomax; + double zzz = distf(xxx, gen_orb.AngMom())*gen_orb.Jmax()/gen_orb.get_freq(1); + + if (zzz>gen_fomax) gen_fomax = zzz; } } @@ -119,8 +118,8 @@ Eigen::VectorXd AxiSymModel::gen_point_2d(int& ierr) } - // Trial velocity point - + // Trial velocity point + // for (it=0; itfmax ? zzz : fmax; + if (zzz > fmax) fmax = zzz; } } gen_fmax[i] = fmax*(1.0 + ftol); @@ -219,8 +220,8 @@ Eigen::VectorXd AxiSymModel::gen_point_2d(int& ierr) pot = get_pot(r); vmax = sqrt(2.0*fabs(Emax - pot)); - // Trial velocity point - + // Trial velocity point + // for (it=0; it distf(eee, r*vt)/fmax ) continue; - if (Unit(random_gen)<0.5) vr *= -1.0; + if (Unit(random_gen) < 0.5) vr *= -1.0; phi = 2.0*M_PI*Unit(random_gen); @@ -254,13 +255,10 @@ Eigen::VectorXd AxiSymModel::gen_point_2d(int& ierr) << std::setw(12) << fmax << " %=" << std::setw(12) << static_cast(++toomany)/totcnt << std::endl; - ierr = 1; out.setZero(); - return out; + return {out, 1}; } - ierr = 0; - cosp = cos(phi); sinp = sin(phi); @@ -272,16 +270,14 @@ Eigen::VectorXd AxiSymModel::gen_point_2d(int& ierr) out[5] = vr * sinp + vt * cosp; out[6] = 0.0; - return out; + return {out, 0}; } -Eigen::VectorXd AxiSymModel::gen_point_2d(double r, int& ierr) +AxiSymModel::PSret AxiSymModel::gen_point_2d(double r) { if (!dist_defined) { - std::cerr << "AxiSymModel: must define distribution before realizing!" - << std::endl; - exit (-1); + throw std::runtime_error("AxiSymModel: must define distribution before realizing!"); } int it; // Iteration counter @@ -300,8 +296,10 @@ Eigen::VectorXd AxiSymModel::gen_point_2d(double r, int& ierr) gen_rloc.resize(gen_N); gen_fmax.resize(gen_N); +#ifdef DEBUG std::cout << "gen_point_2d[" << ModelID << "]: " << rmin << ", " << get_max_radius() << std::endl; +#endif double dr = (get_max_radius() - rmin)/gen_N; @@ -324,7 +322,7 @@ Eigen::VectorXd AxiSymModel::gen_point_2d(double r, int& ierr) eee = pot + 0.5*(vr*vr + vt*vt); double zzz = distf(eee, gen_rloc[i]*vt); - fmax = zzz>fmax ? zzz : fmax; + if (zzz>fmax) fmax = zzz; } } gen_fmax[i] = fmax*(1.0 + ftol); @@ -337,8 +335,8 @@ Eigen::VectorXd AxiSymModel::gen_point_2d(double r, int& ierr) pot = get_pot(r); vmax = sqrt(2.0*fabs(Emax - pot)); - // Trial velocity point - + // Trial velocity point + // for (it=0; it distf(eee, r*vt)/fmax ) continue; - if (Unit(random_gen)<0.5) vr *= -1.0; + if (Unit(random_gen) < 0.5) vr *= -1.0; phi = 2.0*M_PI*Unit(random_gen); @@ -370,13 +368,10 @@ Eigen::VectorXd AxiSymModel::gen_point_2d(double r, int& ierr) << std::setw(12) << fmax << " %=" << std::setw(12) << static_cast(++toomany)/totcnt << std::endl; - ierr = 1; out.setZero(); - return out; + return {out, 1}; } - ierr = 0; - cosp = cos(phi); sinp = sin(phi); @@ -388,16 +383,14 @@ Eigen::VectorXd AxiSymModel::gen_point_2d(double r, int& ierr) out[5] = vr * sinp + vt * cosp; out[6] = 0.0; - return out; + return {out, 0}; } -Eigen::VectorXd AxiSymModel::gen_point_3d(int& ierr) +AxiSymModel::PSret AxiSymModel::gen_point_3d() { if (!dist_defined) { - std::cerr << "AxiSymModel: must define distribution before realizing!" - << std::endl; - exit (-1); + throw std::runtime_error("AxiSymModel: must define distribution before realizing!"); } #ifdef DEBUG @@ -405,7 +398,7 @@ Eigen::VectorXd AxiSymModel::gen_point_3d(int& ierr) #endif double r, pot, vmax, vr=0.0, vt, eee, vt1=0.0, vt2=0.0, fmax; - double phi, sint, cost, sinp, cosp, azi; + double phi, sint, cost, sinp, cosp; double rmin = max(get_min_radius(), gen_rmin); double Emax = get_pot(get_max_radius()); @@ -461,7 +454,7 @@ Eigen::VectorXd AxiSymModel::gen_point_3d(int& ierr) eee = pot + 0.5*(vr*vr + vt*vt); double zzz = distf(eee, r*vt); - fmax = zzz>fmax ? zzz : fmax; + if (zzz>fmax) fmax = zzz; } } gen_fmax[i] = fmax*(1.0 + ftol); @@ -525,7 +518,9 @@ Eigen::VectorXd AxiSymModel::gen_point_3d(int& ierr) if (Unit(random_gen)<0.5) vr *= -1.0; - azi = 2.0*M_PI*Unit(random_gen); + // Orientation of tangential velocity vector + // + double azi = 2.0*M_PI*Unit(random_gen); vt1 = vt*cos(azi); vt2 = vt*sin(azi); @@ -544,13 +539,10 @@ Eigen::VectorXd AxiSymModel::gen_point_3d(int& ierr) << std::setw(12) << fmax << " %=" << std::setw(12) << static_cast(++toomany)/totcnt << std::endl; - ierr = 1; out.setZero(); - return out; + return {out, 1}; } - ierr = 0; - if (Unit(random_gen)>=0.5) vr *= -1.0; phi = 2.0*M_PI*Unit(random_gen); @@ -583,17 +575,284 @@ Eigen::VectorXd AxiSymModel::gen_point_3d(int& ierr) #endif - return out; + return {out, 0}; } -Eigen::VectorXd AxiSymModel::gen_point_3d(double Emin, double Emax, - double Kmin, double Kmax, int& ierr) +AxiSymModel::PSret AxiSymModel::gen_point_3d(double r, double theta, double phi) { if (!dist_defined) { - std::cerr << "AxiSymModel: must define distribution before realizing!" - << std::endl; - exit (-1); + throw std::runtime_error("AxiSymModel: must define distribution before realizing!"); + } + + int it; // Iteration counter + double pot, vmax, vr=0.0, vt=0.0, eee, fmax; + double rmin = max(get_min_radius(), gen_rmin); + double Emax = get_pot(get_max_radius()); + + if (gen_firstime) { + + double tol = 1.0e-5; + double dx = (1.0 - 2.0*tol)/(numr-1); + double dy = (1.0 - 2.0*tol)/(numj-1); + + gen_mass.resize(gen_N); + gen_rloc.resize(gen_N); + gen_fmax.resize(gen_N); + +#ifdef DEBUG + std::cout << "gen_point_3d[" << ModelID << "]: " << rmin + << ", " << get_max_radius() << std::endl; +#endif + + double dr = (get_max_radius() - rmin)/gen_N; + + for (int i=0; ifmax) fmax = zzz; + } + } + gen_fmax[i] = fmax*(1.0 + ftol); + + } + gen_firstime = false; + + std::ofstream out("gen_point_3d.dat"); + if (out) { + out << "# gen_point_3d[" << ModelID << "]" << std::endl + << "# " << std::setw(14) << "r" << std::setw(16) << "mass" + << std::endl; + for (int i=0; i distf(eee, r*vt)/fmax ) continue; + + if (Unit(random_gen)<0.5) vr *= -1.0; + + break; + } + + Eigen::VectorXd out(7); + + if (std::isnan(vr) or std::isnan(vt)) { + if (verbose) std::cout << "NaN found in AxiSymModel::gen_point_3d with r=" + << r << " theta=" << theta << " phi=" << phi + << std::endl; + out.setZero(); + return {out, 1}; + } + + static unsigned totcnt = 0, toomany = 0; + totcnt++; + + if (it==gen_itmax) { + if (verbose) + std::cerr << "Velocity selection failed [" << myid << "]: r=" + << std::setw(12) << r + << std::setw(12) << fmax + << " %=" << std::setw(12) + << static_cast(++toomany)/totcnt << std::endl; + out.setZero(); + return {out, 1}; + } + + double tv = 2.0*M_PI*Unit(random_gen); + double vth = vt*cos(tv); + double vph = vt*sin(tv); + + double cost = cos(theta); + double sint = sin(theta); + + double cosp = cos(phi); + double sinp = sin(phi); + + out[0] = 1.0; + out[1] = r*sint*cosp; + out[2] = r*sint*sinp; + out[3] = r*cost; + out[4] = vr*sint*cosp - vph*sinp + vth*cost*cosp; + out[5] = vr*sint*sinp + vph*cosp + vth*cost*sinp; + out[6] = vr*cost - vth*sint; + + return {out, 0}; +} + +AxiSymModel::PSret AxiSymModel::gen_point_3d_iso +(double r, double theta, double phi, int N, int nv, int na, + double PHI, double THETA, double PSI) +{ + if (!dist_defined) { + throw std::runtime_error("AxiSymModel: must define distribution before realizing!"); + } + + double Emax = get_pot(get_max_radius()); + double vpot = get_pot(r); + double v3 = pow(2.0*fabs(Emax - vpot), 1.5); + int knots = 20; + + // Sanity check + assert((N < nv*na)); + + // Rotation matrix + static Eigen::Matrix3d rotate; + + // Assume isotropic and generate new r slice + if (fabs(gen_lastr-r) > 1.0e-14) { + gen_mass.resize(gen_N+1); + gen_rloc.resize(gen_N+1); + gen_lastr = r; + +#ifdef DEBUG + std::cout << "gen_point_3d_iso[" << ModelID << "]: " << rmin + << ", " << get_max_radius() << std::endl; +#endif + + LegeQuad lv(knots); + + double dv3 = v3/gen_N; + + gen_rloc[0] = 0.0; + gen_mass[0] = 0.0; + for (int i=0; i(get_min_radius(), gen_rmin); double rmax = get_max_radius(); @@ -688,7 +947,8 @@ Eigen::VectorXd AxiSymModel::gen_point_3d(double Emin, double Emax, #endif } - // Enforce limits + // Enforce limits + // Emin = max(Emin, Emin_grid); Emin = min(Emin, Emax_grid); Emax = max(Emax, Emin_grid); @@ -733,9 +993,10 @@ Eigen::VectorXd AxiSymModel::gen_point_3d(double Emin, double Emax, pot = get_pot(r); vt = J/r; - ierr = 0; - // Interpolation check (should be rare) - // Error condition is set + int ierr = 0; + + // Interpolation check (should be rare). Error condition is set. + // if (2.0*(E - pot) - vt*vt < 0.0) { ierr = 1; if (E < pot) E = pot; @@ -745,7 +1006,9 @@ Eigen::VectorXd AxiSymModel::gen_point_3d(double Emin, double Emax, if (Unit(random_gen)<0.5) vr *= -1.0; - azi = 2.0*M_PI*Unit(random_gen); + // Orientation of tangential velocity vector + // + double azi = 2.0*M_PI*Unit(random_gen); vt1 = vt*cos(azi); vt2 = vt*sin(azi); @@ -767,7 +1030,7 @@ Eigen::VectorXd AxiSymModel::gen_point_3d(double Emin, double Emax, #ifdef DEBUG - if (ierr) return out; + if (ierr) return {out, ierr}; eee = pot + 0.5*(out[4]*out[4]+out[5]*out[5]+out[6]*out[6]); orb.new_orbit(eee, 0.5); @@ -787,15 +1050,15 @@ Eigen::VectorXd AxiSymModel::gen_point_3d(double Emin, double Emax, << std::endl; #endif - return out; + return {out, ierr}; } -Eigen::VectorXd AxiSymModel::gen_point_jeans_3d(int& ierr) +AxiSymModel::PSret AxiSymModel::gen_point_jeans_3d() { double r, d, vr, vt, vt1, vt2, vv, vtot; - double phi, sint, cost, sinp, cosp, azi; + double phi, sint, cost, sinp, cosp; double rmin = max(get_min_radius(), gen_rmin); if (gen_firstime_jeans) { @@ -838,8 +1101,8 @@ Eigen::VectorXd AxiSymModel::gen_point_jeans_3d(int& ierr) // Debug - - ofstream test("test.grid"); + // + std::ofstream test("test.grid"); if (test) { test << "# [Jeans] Rmin=" << rmin @@ -885,7 +1148,9 @@ Eigen::VectorXd AxiSymModel::gen_point_jeans_3d(int& ierr) vr = vtot*xxx; vt = vtot*sqrt(yyy); - azi = 2.0*M_PI*Unit(random_gen); + // Orientation of tangential velocity vector + // + double azi = 2.0*M_PI*Unit(random_gen); vt1 = vt*cos(azi); vt2 = vt*sin(azi); @@ -907,24 +1172,20 @@ Eigen::VectorXd AxiSymModel::gen_point_jeans_3d(int& ierr) out[5] = vr * sint*sinp + vt1 * cost*sinp + vt2*cosp; out[6] = vr * cost - vt1 * sint; - ierr = 0; - - return out; + return {out, 0}; } -void AxiSymModel::gen_velocity(double* pos, double* vel, int& ierr) +int AxiSymModel::gen_velocity(double* pos, double* vel) { if (dof()!=3) bomb( "AxiSymModel: gen_velocity only implemented for 3d model!" ); - + if (!dist_defined) { - std::cerr << "AxiSymModel: must define distribution before realizing!" - << std::endl; - exit (-1); + throw std::runtime_error("AxiSymModel: must define distribution before realizing!"); } double r, pot, vmax, vr=0.0, vt, eee, vt1=0.0, vt2=0.0, fmax; - double phi, sint, cost, sinp, cosp, azi; + double phi, sint, cost, sinp, cosp; double rmin = max(get_min_radius(), gen_rmin); double Emax = get_pot(get_max_radius()); @@ -976,7 +1237,7 @@ void AxiSymModel::gen_velocity(double* pos, double* vel, int& ierr) eee = pot + 0.5*(vr*vr + vt*vt); double zzz = distf(eee, r*vt); - fmax = zzz>fmax ? zzz : fmax; + if (zzz>fmax) fmax = zzz; } } gen_fmax[i] = fmax*(1.0 + ftol); @@ -1021,7 +1282,9 @@ void AxiSymModel::gen_velocity(double* pos, double* vel, int& ierr) if (Unit(random_gen)<0.5) vr *= -1.0; - azi = 2.0*M_PI*Unit(random_gen); + // Orientation of tangential velocity vector + // + double azi = 2.0*M_PI*Unit(random_gen); vt1 = vt*cos(azi); vt2 = vt*sin(azi); @@ -1038,12 +1301,9 @@ void AxiSymModel::gen_velocity(double* pos, double* vel, int& ierr) << std::setw(12) << fmax << " %=" << std::setw(12) << static_cast(++toomany)/totcnt << std::endl; - ierr = 1; - return; + return 1; } - ierr = 0; - if (Unit(random_gen)>=0.5) vr *= -1.0; phi = atan2(pos[1], pos[0]); @@ -1055,18 +1315,19 @@ void AxiSymModel::gen_velocity(double* pos, double* vel, int& ierr) vel[0] = vr * sint*cosp + vt1 * cost*cosp - vt2*sinp; vel[1] = vr * sint*sinp + vt1 * cost*sinp + vt2*cosp; vel[2] = vr * cost - vt1 * sint; + + return 0; } -Eigen::VectorXd SphericalModelMulti::gen_point(int& ierr) +AxiSymModel::PSret SphericalModelMulti::gen_point() { if (!real->dist_defined || !fake->dist_defined) { - std::cerr << "SphericalModelMulti: input distribution functions must be defined before realizing!" << std::endl; - exit (-1); + throw std::runtime_error("SphericalModelMulti: input distribution functions must be defined before realizing!"); } double r, pot, vmax; double vr=0.0, vt=0.0, eee=0.0, vt1=0.0, vt2=0.0, fmax, emax; - double mass, phi, sint, cost, sinp, cosp, azi; + double mass, phi, sint, cost, sinp, cosp; double Emax = get_pot(get_max_radius()); @@ -1271,6 +1532,7 @@ Eigen::VectorXd SphericalModelMulti::gen_point(int& ierr) for (it=0; itdistf(eee, r*vt); - fmax = zzz>fmax ? zzz : fmax; + + if (zzz>fmax) fmax = zzz; } } gen_fmax[i] = fmax*(1.0 + ftol); @@ -1500,7 +1760,9 @@ Eigen::VectorXd SphericalModelMulti::gen_point(double radius, int& ierr) if (Unit(random_gen)<0.5) vr *= -1.0; - azi = 2.0*M_PI*Unit(random_gen); + // Orientation of the tangential velocity vector + // + double azi = 2.0*M_PI*Unit(random_gen); vt1 = vt*cos(azi); vt2 = vt*sin(azi); @@ -1519,20 +1781,17 @@ Eigen::VectorXd SphericalModelMulti::gen_point(double radius, int& ierr) << " reject=" << std::setw( 6) << reject << std::endl; } - ierr = 1; out.setZero(); - return out; + return {out, 1}; } - ierr = 0; - if (Unit(random_gen)>=0.5) vr *= -1.0; - phi = 2.0*M_PI*Unit(random_gen); - cost = 2.0*(Unit(random_gen) - 0.5); - sint = sqrt(1.0 - cost*cost); - cosp = cos(phi); - sinp = sin(phi); + double phi = 2.0*M_PI*Unit(random_gen); + double cost = 2.0*(Unit(random_gen) - 0.5); + double sint = sqrt(1.0 - cost*cost); + double cosp = cos(phi); + double sinp = sin(phi); double dfr = real->distf(eee, r*vt); double dfn = fake->distf(eee, r*vt); @@ -1560,25 +1819,206 @@ Eigen::VectorXd SphericalModelMulti::gen_point(double radius, int& ierr) out[5] = vr * sint*sinp + vt1 * cost*sinp + vt2*cosp; out[6] = vr * cost - vt1 * sint; + int ierr = 0; for (int i=0; i<7; i++) { if (std::isnan(out[i]) || std::isinf(out[i])) ierr = 1; } - return out; + return {out, ierr}; } +AxiSymModel::PSret +SphericalModelMulti::gen_point(double radius, double theta, double phi) +{ + if (!real->dist_defined || !fake->dist_defined) { + throw std::runtime_error("SphericalModelMulti: input distribution functions must be defined before realizing!"); + } + + double r, pot, vmax, fmax; + double vr=0.0, vt=0.0, eee=0.0, vt1=0.0, vt2=0.0; + + double Emax = get_pot(get_max_radius()); + + if (gen_firstime) { + double tol = 1.0e-5; + double dx = (1.0 - 2.0*tol)/(numr-1); + double dy = (1.0 - 2.0*tol)/(numj-1); + double dr; + + gen_mass.resize(gen_N); + gen_rloc.resize(gen_N); + gen_fmax.resize(gen_N); + + if (rmin_gen <= 1.0e-16) gen_logr = 0; + + if (gen_logr) + dr = (log(rmax_gen) - log(rmin_gen))/(gen_N-1); + else + dr = (rmax_gen - rmin_gen)/(gen_N-1); + + for (int i=0; iget_mass(r); + + pot = get_pot(r); + vmax = sqrt(2.0*fabs(Emax - pot)); + + fmax = 0.0; + for (int j=0; jdistf(eee, r*vt); + + if (zzz>fmax) fmax = zzz; + } + } + gen_fmax[i] = fmax*(1.0 + ftol); + + } + + // Debug + // + ofstream test("test_multi.grid"); + if (test) { + + test << "# Rmin=" << rmin_gen + << " Rmax=" << rmax_gen + << std::endl; + + for (int i=0; idistf(get_pot(exp(gen_rloc[i])), 0.5) + << std::endl; + } + } -Eigen::VectorXd - SphericalModelMulti::gen_point(double Emin, double Emax, double Kmin, - double Kmax, int& ierr) + gen_firstime = false; + } + + r = max(rmin_gen, min(rmax_gen, radius)); + fmax = odd2(r, gen_rloc, gen_fmax, 1); + if (gen_logr) r = exp(r); + + pot = get_pot(r); + vmax = sqrt(2.0*max(Emax - pot, 0.0)); + + // Trial velocity point + // + // Diagnostic counters + int reject=0; + int it; // Iteration counter + for (it=0; it fake->distf(eee, r*vt)/fmax ) { + reject++; + continue; + } + + if (Unit(random_gen)<0.5) vr *= -1.0; + + // Orientation of tangential velocity vector + // + double azi = 2.0*M_PI*Unit(random_gen); + vt1 = vt*cos(azi); + vt2 = vt*sin(azi); + + break; + } + + Eigen::VectorXd out(7); + + if (it==gen_itmax) { + if (verbose) { + static unsigned totcnt = 0; + std::cerr << "Velocity selection failed [" << std::setw(7) << ++totcnt + << "," << std::setw(4) << myid << "]: r=" + << std::setw(12) << r + << std::setw(12) << fmax + << " reject=" << std::setw( 6) << reject + << std::endl; + } + out.setZero(); + return {out, 1}; + } + + if (Unit(random_gen)>=0.5) vr *= -1.0; + + double dfr = real->distf(eee, r*vt); + double dfn = fake->distf(eee, r*vt); + double rat = dfr/dfn; + + // Deep debug + // +#ifdef DEBUG + if (rat <= 0.0) { + std::cout << "[" << std::setw(3) << myid << "] Bad mass: rat=" + << std::setw(16) << rat + << " df(M)=" << std::setw(16) << dfr + << " df(N)=" << std::setw(16) << dfn + << " r=" << std::setw(16) << r + << " E=" << std::setw(16) << eee + << std::endl; + } +#endif + + double cost = cos(theta); + double sint = sin(theta); + double cosp = cos(phi); + double sinp = sin(phi); + + out[0] = rat; + out[1] = r * sint*cosp; + out[2] = r * sint*sinp; + out[3] = r * cost; + out[4] = vr * sint*cosp + vt1 * cost*cosp - vt2*sinp; + out[5] = vr * sint*sinp + vt1 * cost*sinp + vt2*cosp; + out[6] = vr * cost - vt1 * sint; + + int ierr = 0; + for (int i=0; i<7; i++) { + if (std::isnan(out[i]) || std::isinf(out[i])) ierr = 1; + } + + return {out, ierr}; +} + +AxiSymModel::PSret SphericalModelMulti::gen_point +(double Emin, double Emax, double Kmin, double Kmax) { if (!real->dist_defined || !fake->dist_defined) { - std::cerr << "SphericalModelMulti: input distribution functions must be defined before realizing!" << std::endl; - exit (-1); + throw std::runtime_error("SphericalModelMulti: input distribution functions must be defined before realizing!"); } double r, vr, vt, vt1, vt2, E, K, J, jmax, w1t, pot; - double phi, sint, cost, sinp, cosp, azi; + double phi, sint, cost, sinp, cosp; double rmin = max(get_min_radius(), gen_rmin); double rmax = get_max_radius(); @@ -1619,7 +2059,9 @@ Eigen::VectorXd vector wrvec; for (int j=0; j(Emin, Emin_grid); Emin = min(Emin, Emax_grid); Emax = max(Emax, Emin_grid); @@ -1714,9 +2157,10 @@ Eigen::VectorXd pot = get_pot(r); vt = J/r; - ierr = 0; - // Interpolation check (should be rare) - // Error condition is set + + // Interpolation check (should be rare). Error condition is set. + // + int ierr = 0; if (2.0*(E - pot) - vt*vt < 0.0) { ierr = 1; if (E < pot) E = pot; @@ -1726,7 +2170,9 @@ Eigen::VectorXd if (Unit(random_gen)<0.5) vr *= -1.0; - azi = 2.0*M_PI*Unit(random_gen); + // Orientation of the tangential velocity vector + // + double azi = 2.0*M_PI*Unit(random_gen); vt1 = vt*cos(azi); vt2 = vt*sin(azi); @@ -1750,7 +2196,7 @@ Eigen::VectorXd if (std::isnan(out[i]) || std::isinf(out[i])) ierr = 1; } - return out; + return {out, ierr}; } diff --git a/include/DiskDensityFunc.H b/include/DiskDensityFunc.H index 0966f0db8..c0e4a344d 100644 --- a/include/DiskDensityFunc.H +++ b/include/DiskDensityFunc.H @@ -3,6 +3,8 @@ #include +#include + #include #include @@ -35,8 +37,10 @@ DiskDensityFunc { private: +#ifdef HAVE_PYTHON3 //! The disk-density function pybind11::function disk_density; +#endif //! Interpreter started? bool started = false; diff --git a/include/ParticleReader.H b/include/ParticleReader.H index 3ae70b174..0d150dc45 100644 --- a/include/ParticleReader.H +++ b/include/ParticleReader.H @@ -36,6 +36,13 @@ namespace PR int numprocs, myid; bool use_mpi; + //! Check for a list of directories or files and not both + static bool parseDirectoryList(const std::vector& files); + + //! Scan a directory for partial phase-space writes ending in a + //! integer index + static std::vector scanDirectory(const std::string& dir); + public: //! Constructor: check for and set up MPI @@ -227,6 +234,90 @@ namespace PR }; + class PSPhdf5 : public ParticleReader + { + protected: + + //@{ + //! Parameters + double time; + std::vector particles; + bool _verbose; + std::vector _files; + + std::vector mass; + std::vector Niattrib, Ndattrib; + std::vector npart, nptot; + unsigned long totalCount; + bool real4, gadget4; + int ntypes, nfiles; + //@} + + //! Current file + std::vector::iterator curfile; + + //! Components + std::vector comps; + + //@{ + //! Current component and index + std::string curcomp; + int curindx; + //@} + + unsigned pcount; + template void read_and_load(); + template void read_and_load_gadget4(); + template void read_and_load_psp(); + + void getInfo(); + void packParticle(); + bool nextFile(); + + public: + + //! Constructor + PSPhdf5(const std::vector& file, bool verbose=false); + + //! Select a particular particle type and reset the iterator + virtual void SelectType(const std::string& type) { + auto it = std::find(comps.begin(), comps.end(), type); + if (it != comps.end()) { + curcomp = type; + curindx = it - comps.begin(); + } + else { + std::cerr << "PSPhdf5 error: could not find particle component <" << type << ">" << std::endl; + std::cerr << "Available particle components are:"; + for (auto s : comps) std::cerr << " " << s; + std::cerr << std::endl; + throw std::runtime_error("PSPhdf5: non-existent component"); + } + + curfile = _files.begin(); // Set to first file and open + nextFile(); + } + + //! Number of particles in the chosen type + virtual unsigned long CurrentNumber() { return totalCount; } + + //! Return list of particle types + virtual std::vector GetTypes() { return comps; } + + //! Get current time + virtual double CurrentTime() { return time; } + + //! Reset to beginning of particles for this component + virtual const Particle* firstParticle(); + + //! Get the next particle + virtual const Particle* nextParticle(); + + //! Get file cound per snapshot + virtual int NumFiles() { return nfiles; } + + }; + class PSPstanza { public: diff --git a/include/cudaParticle.cuH b/include/cudaParticle.cuH index 8673f0462..5147b01dd 100644 --- a/include/cudaParticle.cuH +++ b/include/cudaParticle.cuH @@ -90,8 +90,12 @@ extern __host__ std::ostream& operator<< (std::ostream& os, const cudaParticle& p); //! Functor for extracting level info from cudaParticle structures +#if CUDART_VERSION < 12040 struct cuPartToLevel : public thrust::unary_function +#else +struct cuPartToLevel +#endif { int _t; @@ -106,8 +110,12 @@ struct cuPartToLevel : }; //! Functor for extracting change level info from cudaParticle structures +#if CUDART_VERSION < 12040 struct cuPartToChange : public thrust::unary_function> +#else +struct cuPartToChange +#endif { __host__ __device__ thrust::pair operator()(const cudaParticle& p) const diff --git a/include/massmodel.H b/include/massmodel.H index 88939322d..233e2f257 100644 --- a/include/massmodel.H +++ b/include/massmodel.H @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -111,6 +112,10 @@ public: class AxiSymModel : public MassModel, public std::enable_shared_from_this { +public: + + using PSret = std::tuple; + protected: //@{ //! Stuff for gen_point @@ -123,14 +128,18 @@ protected: bool gen_firstime_jeans; Eigen::VectorXd gen_rloc, gen_mass, gen_fmax; SphericalOrbit gen_orb; - double gen_fomax, gen_ecut; + double gen_fomax, gen_ecut, gen_lastr=-1.0; //@} - Eigen::VectorXd gen_point_2d(int& ierr); - Eigen::VectorXd gen_point_2d(double r, int& ierr); - Eigen::VectorXd gen_point_3d(int& ierr); - Eigen::VectorXd gen_point_3d(double Emin, double Emax, double Kmin, double Kmax, int& ierr); - Eigen::VectorXd gen_point_jeans_3d(int& ierr); + PSret gen_point_2d(); + PSret gen_point_2d(double r); + PSret gen_point_3d(); + PSret gen_point_3d(double Emin, double Emax, double Kmin, double Kmax); + PSret gen_point_jeans_3d(); + PSret gen_point_3d(double r, double theta, double phi); + PSret gen_point_3d_iso(double r, double theta, double phi, + int N, int nv, int na, + double PHI=0, double THETA=0, double PSI=0); double Emin_grid, Emax_grid, dEgrid, dKgrid; vector Egrid, Kgrid, EgridMass; @@ -212,55 +221,89 @@ public: void set_Ecut(double cut) { gen_ecut = cut; } //! Generate a phase-space point - virtual Eigen::VectorXd gen_point(int& ierr) { + virtual PSret gen_point() + { if (dof()==2) - return gen_point_2d(ierr); + return gen_point_2d(); else if (dof()==3) - return gen_point_3d(ierr); + return gen_point_3d(); else bomb( "dof must be 2 or 3" ); - return Eigen::VectorXd(); + return {Eigen::VectorXd(), 1}; } //! Generate a phase-space point using Jeans' equations - virtual Eigen::VectorXd gen_point_jeans(int& ierr) { + virtual PSret gen_point_jeans() + { if (dof()==2) bomb( "AxiSymModel: gen_point_jeans_2d(ierr) not implemented!" ); else if (dof()==3) - return gen_point_jeans_3d(ierr); + return gen_point_jeans_3d(); else bomb( "dof must be 2 or 3" ); - return Eigen::VectorXd(); + return {Eigen::VectorXd(), 1}; } //! Generate a phase-space point at a particular radius - virtual Eigen::VectorXd gen_point(double r, int& ierr) { + virtual PSret gen_point(double r) { + if (dof()==2) + return gen_point_2d(r); + else if (dof()==3) + bomb( "AxiSymModel: gen_point(r) not implemented!" ); + else + bomb( "AxiSymModel: dof must be 2 or 3" ); + + return {Eigen::VectorXd(), 1}; + } + + //! Generate a phase-space point at a particular radius, theta, and phi + virtual PSret + gen_point(double r, double theta, double phi) + { if (dof()==2) - return gen_point_2d(r, ierr); + bomb( "AxiSymModel: gen_point(r, theta, phi) not implemented!" ); else if (dof()==3) - bomb( "AxiSymModel: gen_point(r, ierr) not implemented!" ); + return gen_point_3d(r, theta, phi); else bomb( "AxiSymModel: dof must be 2 or 3" ); - return Eigen::VectorXd(); + return {Eigen::VectorXd(), 1}; + } + + //! Generate a phase-space point at a particular radius, theta, and phi + virtual PSret + gen_point_iso(double r, double theta, double phi, int N, int nv, int na, + double PHI, double THETA, double PSI) + { + if (dof()==2) + bomb( "AxiSymModel: gen_point(r, theta, phi) not implemented!" ); + else if (dof()==3) + return gen_point_3d_iso(r, theta, phi, N, nv, na, + PHI, THETA, PSI); + else + bomb( "AxiSymModel: dof must be 2 or 3" ); + + return {Eigen::VectorXd(), 1}; } //! Generate a phase-space point at a particular energy and angular momentum - virtual Eigen::VectorXd gen_point(double Emin, double Emax, double Kmin, double Kmax, int& ierr) { + virtual PSret + gen_point(double Emin, double Emax, double Kmin, double Kmax) + { if (dof()==2) - bomb( "AxiSymModel: gen_point(r, ierr) not implemented!" ); + bomb( "AxiSymModel: gen_point(r) not implemented!" ); else if (dof()==3) - return gen_point_3d(Emin, Emax, Kmin, Kmax, ierr); + return gen_point_3d(Emin, Emax, Kmin, Kmax); else bomb( "AxiSymModel: dof must be 2 or 3" ); - return Eigen::VectorXd(); + return {Eigen::VectorXd(), 1}; } //! Generate a the velocity variate from a position - virtual void gen_velocity(double *pos, double *vel, int& ierr); + virtual int gen_velocity(double *pos, double *vel); }; @@ -501,9 +544,10 @@ public: //@{ //! Overloaded to provide mass distribution from Real and Number distribution from Fake - Eigen::VectorXd gen_point(int& ierr); - Eigen::VectorXd gen_point(double r, int& ierr); - Eigen::VectorXd gen_point(double Emin, double Emax, double Kmin, double Kmax, int& ierr); + PSret gen_point(); + PSret gen_point(double r); + PSret gen_point(double Emin, double Emax, double Kmin, double Kmax); + PSret gen_point(double r, double theta, double phi); //@} diff --git a/pyEXP/BasisWrappers.cc b/pyEXP/BasisWrappers.cc index bb214cf20..58042c8fb 100644 --- a/pyEXP/BasisWrappers.cc +++ b/pyEXP/BasisWrappers.cc @@ -199,6 +199,18 @@ void BasisFactoryClasses(py::module &m) a fixed potential model. AccelFunc can be inherited by a native Python class and the evalcoefs() may be implemented in Python and passed to IntegrateOrbits in the same way as a native C++ class. + + Non-inertial frames of reference + -------------------------------- + Each component of a multiple component simulation may have its own expansion + center. Orbit integration in the frame of reference of the expansion is + accomplished by defining a moving frame of reference using the setNonInertial() + call with either an array of n times and center positions (as an nx3 array) + or by initializing with an EXP orient file. + + We provide a member function, setNonInertialAccel(t), to estimate the frame + acceleration at a given time. This may be useful for user-defined acceleration + routines. This is automatically called default C++ evalcoefs() routine. )"; using namespace BasisClasses; @@ -414,6 +426,12 @@ void BasisFactoryClasses(py::module &m) PYBIND11_OVERRIDE_PURE(void, BiorthBasis, set_coefs, coefs); } + std::vector getAccel(double x, double y, double z) override + { + PYBIND11_OVERRIDE_PURE(std::vector, BiorthBasis, + getAccel, x, y, z); + } + }; class PySpherical : public Spherical @@ -458,6 +476,10 @@ void BasisFactoryClasses(py::module &m) PYBIND11_OVERRIDE(std::vector, Spherical, getFields, x, y, z); } + std::vector getAccel(double x, double y, double z) override { + PYBIND11_OVERRIDE(std::vector, Spherical, getAccel, x, y, z); + } + void accumulate(double x, double y, double z, double mass) override { PYBIND11_OVERRIDE(void, Spherical, accumulate, x, y, z, mass); } @@ -506,6 +528,10 @@ void BasisFactoryClasses(py::module &m) PYBIND11_OVERRIDE(std::vector, Cylindrical, getFields, x, y, z); } + std::vector getAccel(double x, double y, double z) override { + PYBIND11_OVERRIDE(std::vector, Cylindrical, getAccel, x, y, z); + } + void accumulate(double x, double y, double z, double mass) override { PYBIND11_OVERRIDE(void, Cylindrical, accumulate, x, y, z, mass); } @@ -568,6 +594,11 @@ void BasisFactoryClasses(py::module &m) PYBIND11_OVERRIDE(std::vector, FlatDisk, getFields, x, y, z); } + std::vector getAccel(double x, double y, double z) override + { + PYBIND11_OVERRIDE(std::vector, FlatDisk, getAccel, x, y, z); + } + void accumulate(double x, double y, double z, double mass) override { PYBIND11_OVERRIDE(void, FlatDisk, accumulate, x, y, z, mass); @@ -633,6 +664,11 @@ void BasisFactoryClasses(py::module &m) PYBIND11_OVERRIDE(std::vector, CBDisk, getFields, x, y, z); } + std::vector getAccel(double x, double y, double z) override + { + PYBIND11_OVERRIDE(std::vector, CBDisk, getAccel, x, y, z); + } + void accumulate(double x, double y, double z, double mass) override { PYBIND11_OVERRIDE(void, CBDisk, accumulate, x, y, z, mass); @@ -701,6 +737,11 @@ void BasisFactoryClasses(py::module &m) PYBIND11_OVERRIDE(std::vector, Slab, getFields, x, y, z); } + std::vector getAccel(double x, double y, double z) override + { + PYBIND11_OVERRIDE(std::vector, Slab, getAccel, x, y, z); + } + void accumulate(double x, double y, double z, double mass) override { PYBIND11_OVERRIDE(void, Slab, accumulate, x, y, z, mass); @@ -769,6 +810,11 @@ void BasisFactoryClasses(py::module &m) PYBIND11_OVERRIDE(std::vector, Cube, getFields, x, y, z); } + std::vector getAccel(double x, double y, double z) override + { + PYBIND11_OVERRIDE(std::vector, Cube, getAccel, x, y, z); + } + void accumulate(double x, double y, double z, double mass) override { PYBIND11_OVERRIDE(void, Cube, accumulate, x, y, z, mass); @@ -945,9 +991,104 @@ void BasisFactoryClasses(py::module &m) ------- list: str list of basis function labels - )" - ); + )" + ) + .def("setInertial", &BasisClasses::Basis::setInertial, + R"( + Reset to inertial coordinates + + Parameters + ---------- + None + + Returns + ------- + None + + See also + -------- + setNonInertial : set non-inertial data + setNonInertialAccel : set the non-inertial acceleration + )" + ) + .def("setNonInertial", + [](BasisClasses::Basis& A, + int N, const Eigen::VectorXd& times, const Eigen::MatrixXd& pos) { + A.setNonInertial(N, times, pos); + }, + R"( + Initialize for pseudo-force computation with a time series of positions + using (1) a time vector and (2) a center position matrix with rows of three + vectors + + Parameters + ---------- + N : int + number of previous positions to use for quadratic fit + times : list or numpy.ndarray + list of time points + pos : numpy.ndarray + an array with N rows and 3 columns of center positions + + Returns + ------- + None + + See also + -------- + setNonInertial : set non-inertial data from an Orient file + setNonInertialAccel : set the non-inertial acceration + )", + py::arg("N"), py::arg("times"), py::arg("pos") + ) + .def("setNonInertial", + [](BasisClasses::Basis& A, int N, const std::string orient) + { + A.setNonInertial(N, orient); + }, + R"( + Initialize for pseudo-force computation with a time series of positions + using a EXP orient file + + Parameters + ---------- + N : int + number of previous positions to use for quadratic fit + orient : str + name of the orient file + + Returns + ------- + None + + See also + -------- + setNonInertial : set non-inertial data from a time series of values + setNonInertialAccel : set the non-inertial acceration + )", + py::arg("N"), py::arg("orient") + ) + .def("setNonInertialAccel", &BasisClasses::Basis::setNonInertialAccel, + R"( + Set the pseudo acceleration for the non-inertial data at a given time + + Parameters + ---------- + time : float + evaluation time + + Returns + ------- + None + See also + -------- + setNonInertial : set non-inertial data from a time series of values + setNonInertial : set non-inertial data from an EXP orient file + )", + py::arg("time") + ); + py::class_, PyBiorthBasis, BasisClasses::Basis> (m, "BiorthBasis") .def(py::init(), @@ -1110,7 +1251,31 @@ void BasisFactoryClasses(py::module &m) See also -------- getFieldsCoefs : get fields for each coefficient set - __call__ : same getFields() but provides field labels in a tuple + __call__ : same as getFields() but provides field labels in a tuple + )", + py::arg("x"), py::arg("y"), py::arg("z")) + .def("getAccel", &BasisClasses::BiorthBasis::getAccel, + R"( + Return the acceleration for a given cartesian position + + Parameters + ---------- + x : float + x-axis position + y : float + y-axis position + z : float + z-axis position + + Returns + ------- + fields: numpy.ndarray + + See also + -------- + getFields : returns density, potential and acceleration + getFieldsCoefs : get fields for each coefficient set + __call__ : same as getFields() but provides field labels in a tuple )", py::arg("x"), py::arg("y"), py::arg("z")) .def("getFieldsCoefs", &BasisClasses::BiorthBasis::getFieldsCoefs, diff --git a/pyEXP/UtilWrappers.cc b/pyEXP/UtilWrappers.cc index 119005c84..04a80ae01 100644 --- a/pyEXP/UtilWrappers.cc +++ b/pyEXP/UtilWrappers.cc @@ -155,4 +155,22 @@ void UtilityClasses(py::module &m) { Report on the version and git commit. This is the same version information reported by the EXP N-body code.)"); + m.def("Version", + []() { + std::string version_str = VERSION; + std::istringstream iss(version_str); + std::string token; + int parts[3] = {0, 0, 0}; + int i = 0; + + while (std::getline(iss, token, '.') && i < 3) { + parts[i++] = std::stoi(token); + } + + return std::make_tuple(parts[0], parts[1], parts[2]); + }, + R"( + Return the version. + + This is the same version information reported by the EXP N-body code.)"); } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3f712eafe..976663b3a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,17 +1,17 @@ -set(exp_SOURCES Basis.cc Bessel.cc Component.cc - Cube.cc Cylinder.cc ExternalCollection.cc CBDisk.cc - ExternalForce.cc Orient.cc PotAccel.cc ScatterMFP.cc - PeriodicBC.cc SphericalBasis.cc AxisymmetricBasis.cc Sphere.cc - TwoDCoefs.cc TwoCenter.cc EJcom.cc global.cc begin.cc ddplgndr.cc - Direct.cc Shells.cc NoForce.cc end.cc OutputContainer.cc OutPS.cc - OutPSQ.cc OutPSN.cc OutPSP.cc OutPSR.cc OutCHKPT.cc OutCHKPTQ.cc - Output.cc externalShock.cc CylEXP.cc generateRelaxation.cc - HaloBulge.cc incpos.cc incvel.cc ComponentContainer.cc OutAscii.cc - OutMulti.cc OutRelaxation.cc OrbTrace.cc OutDiag.cc OutLog.cc - OutVel.cc OutCoef.cc multistep.cc parse.cc SlabSL.cc step.cc - tidalField.cc ultra.cc ultrasphere.cc MPL.cc OutFrac.cc OutCalbr.cc - ParticleFerry.cc chkSlurm.c chkTimer.cc GravKernel.cc - CenterFile.cc PolarBasis.cc FlatDisk.cc signals.cc) +set(exp_SOURCES Basis.cc Bessel.cc Component.cc Cube.cc Cylinder.cc + ExternalCollection.cc CBDisk.cc ExternalForce.cc Orient.cc + PotAccel.cc ScatterMFP.cc PeriodicBC.cc SphericalBasis.cc + AxisymmetricBasis.cc Sphere.cc TwoDCoefs.cc TwoCenter.cc EJcom.cc + global.cc begin.cc ddplgndr.cc Direct.cc Shells.cc NoForce.cc end.cc + OutputContainer.cc OutPS.cc OutPSQ.cc OutPSN.cc OutPSP.cc OutPSR.cc + OutCHKPT.cc OutCHKPTQ.cc Output.cc externalShock.cc CylEXP.cc + generateRelaxation.cc HaloBulge.cc incpos.cc incvel.cc + ComponentContainer.cc OutAscii.cc OutHDF5.cc OutMulti.cc + OutRelaxation.cc OrbTrace.cc OutDiag.cc OutLog.cc OutVel.cc + OutCoef.cc multistep.cc parse.cc SlabSL.cc step.cc tidalField.cc + ultra.cc ultrasphere.cc MPL.cc OutFrac.cc OutCalbr.cc + ParticleFerry.cc chkSlurm.c chkTimer.cc GravKernel.cc CenterFile.cc + PolarBasis.cc FlatDisk.cc signals.cc) if (ENABLE_CUDA) list(APPEND exp_SOURCES cudaPolarBasis.cu cudaSphericalBasis.cu diff --git a/src/Component.H b/src/Component.H index 82eae2ce2..8be95eaa0 100644 --- a/src/Component.H +++ b/src/Component.H @@ -6,11 +6,19 @@ #include +#include + +#include +#include +#include +#include + #include #include #include #include #include +#include #include #include #include @@ -349,6 +357,14 @@ protected: static const std::set valid_keys_force; //@} + + //@{ + //! HDF5 compression parameters + int H5compress = 0; + bool H5shuffle = true; + int H5chunk = 4096; + //@} + public: //! Describe the phase space coordinates @@ -533,6 +549,9 @@ public: //! Initialize from file Component(YAML::Node&, istream *in, bool SPL); + //! Initialize from HDF5 + Component(YAML::Node&, PR::PSPhdf5& reader); + //! Destructor ~Component(); @@ -551,6 +570,18 @@ public: //! Write binary component phase-space structure void write_binary(ostream *out, bool real4 = false); + //@{ + //! Write HDF5 phase-space structure for real or float + + //! Gadget style + template + void write_HDF5(HighFive::Group& group, bool masses, bool IDs); + + //! PSP style + template + void write_H5(H5::Group& group); + //@} + //! Write header for per-node writes void write_binary_header(ostream* out, bool real4, std::string prefix, int nth=1); @@ -970,6 +1001,16 @@ public: //! Compute level from minimum requested time step from last master step inline bool DTreset() { return dtreset; } + //! Set H5 compression parameters; in principle, compression can be + //! set per component, although it is set to be the same for all + //! components currently. + void setH5(int compress, bool shuffle, int chunk) + { + H5compress = compress; + H5shuffle = shuffle; + H5chunk = chunk; + } + #if HAVE_LIBCUDA==1 //@{ //! CUDA utilities for handling host <===> device exchange diff --git a/src/Component.cc b/src/Component.cc index b52f30c30..a7a866ee6 100644 --- a/src/Component.cc +++ b/src/Component.cc @@ -86,7 +86,10 @@ const std::set Component::valid_keys_parm = "ctr_name", "noswitch", "freezeL", - "dtreset" + "dtreset", + "H5compress", + "H5shuffle", + "H5chunk", }; const std::set Component::valid_keys_force = @@ -776,6 +779,209 @@ Component::Component(YAML::Node& CONF, istream *in, bool SPL) : conf(CONF) pbuf.resize(PFbufsz); } +Component::Component(YAML::Node& CONF, PR::PSPhdf5& reader) : conf(CONF) +{ + // Make a copy + conf = CONF; + + try { + name = conf["name"].as(); + } + catch (YAML::Exception & error) { + if (myid==0) std::cout << __FILE__ << ": " << __LINE__ << std::endl + << "Error parsing component 'name': " + << error.what() << std::endl + << std::string(60, '-') << std::endl + << "Config node" << std::endl + << std::string(60, '-') << std::endl + << conf << std::endl + << std::string(60, '-') << std::endl; + + throw std::runtime_error("Component: error parsing component "); + } + + try { + cconf = conf["parameters"]; + } + catch (YAML::Exception & error) { + if (myid==0) std::cout << "Error parsing 'parameters' for Component <" + << name << ">: " + << error.what() << std::endl + << std::string(60, '-') << std::endl + << "Config node" << std::endl + << std::string(60, '-') << std::endl + << conf + << std::string(60, '-') << std::endl; + + throw std::runtime_error("Component: error parsing "); + } + + pfile = conf["bodyfile"].as(); + + YAML::Node cforce; + try { + cforce = conf["force"]; + } + catch (YAML::Exception & error) { + if (myid==0) std::cout << "Error parsing 'force' for Component <" + << name << ">: " + << error.what() << std::endl + << std::string(60, '-') << std::endl + << "Config node" << std::endl + << std::string(60, '-') << std::endl + << conf << std::endl + << std::string(60, '-') << std::endl; + + throw std::runtime_error("Component: error parsing "); + } + + id = cforce["id"].as(); + + try { + fconf = cforce["parameters"]; + } + catch (YAML::Exception & error) { + if (myid==0) std::cout << "Error parsing force 'parameters' for Component <" + << name << ">: " + << error.what() << std::endl + << std::string(60, '-') << std::endl + << "Config node" << std::endl + << std::string(60, '-') << std::endl + << cforce << std::endl + << std::string(60, '-') << std::endl; + + throw std::runtime_error("Component: error parsing force "); + } + + // Defaults + // + EJ = 0; + nEJkeep = 100; + nEJwant = 500; + nEJaccel = 0; + EJkinE = true; + EJext = false; + EJdiag = false; + EJdryrun = false; + EJx0 = 0.0; + EJy0 = 0.0; + EJz0 = 0.0; + EJu0 = 0.0; + EJv0 = 0.0; + EJw0 = 0.0; + EJdT = 0.0; + EJlinear = false; + EJdamp = 1.0; + + binary = true; + + adiabatic = false; + ton = -1.0e20; + toff = 1.0e20; + twid = 0.1; + + rtrunc = 1.0e20; + rcom = 1.0e20; + consp = false; + tidal = -1; + + com_system = false; + com_log = false; + com_restart = 0; + c0 = NULL; // Component for centering (null by + // default) +#if HAVE_LIBCUDA==1 + bunchSize = 100000; +#endif + + timers = false; + + force = 0; // Null out pointers + orient = 0; + + com = 0; + cov = 0; + coa = 0; + center = 0; + angmom = 0; + ps = 0; + + com0 = 0; + cov0 = 0; + acc0 = 0; + + indexing = true; + aindex = false; + umagic = true; + + keyPos = -1; + nlevel = -1; + top_seq = 0; + + pBufSiz = 100000; + blocking = false; + buffered = true; // Use buffered writes for POSIX binary + noswitch = false; // Allow multistep switching at master step only + dtreset = true; // Select level from criteria over last step + freezeLev = false; // Only compute new levels on first step + + configure(); + + find_ctr_component(); + + initialize_cuda(); + + particles.clear(); + + // Read bodies + // + double rmax1=0.0; + niattrib=-1; + for (auto p=reader.firstParticle(); p!=0; p=reader.nextParticle()) { + // Assign attribute sizes + if (niattrib<0) { + niattrib = p->iattrib.size(); + ndattrib = p->dattrib.size(); + } + + // Make the particle + particles[p->indx] = std::make_shared(*p); + + // Get limits + double r2 = 0.0; + for (int k=0; k<3; k++) r2 += p->pos[k]*p->pos[k]; + rmax1 = std::max(r2, rmax1); + top_seq = std::max(top_seq, p->indx); + } + + // Default: set to max radius + rmax = sqrt(fabs(rmax1)); + MPI_Bcast(&rmax, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + // Send top_seq to all nodes + MPI_Bcast(&top_seq, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + + initialize(); + + // Initialize the particle ferry + // instance with dynamic attribute + // sizes + if (not pf) pf = ParticleFerryPtr(new ParticleFerry(niattrib, ndattrib)); + + mdt_ctr = std::vector< std::vector > (multistep+1); + for (unsigned n=0; n<=multistep; n++) mdt_ctr[n] = std::vector(mdtDim, 0); + + angmom_lev = std::vector(3*(multistep+1), 0); + com_lev = std::vector(3*(multistep+1), 0); + cov_lev = std::vector(3*(multistep+1), 0); + coa_lev = std::vector(3*(multistep+1), 0); + com_mas = std::vector( multistep+1, 0); + + reset_level_lists(); + + pbuf.resize(PFbufsz); +} + void Component::configure(void) { // Check for unmatched keys @@ -2247,6 +2453,232 @@ void Component::write_binary(ostream* out, bool real4) } +//! Write HDF5 phase-space structure for this nodes particles only. +//! Use template to specialize to either float or double precision. +template +void Component::write_HDF5(HighFive::Group& group, bool masses, bool IDs) +{ + Eigen::Matrix pos(nbodies, 3); + Eigen::Matrix vel(nbodies, 3); + + std::vector mas, pot, potext; + std::vector ids; + + Eigen::Matrix iattrib; + Eigen::Matrix dattrib; + + if (masses) mas.resize(nbodies); + if (IDs) ids.resize(nbodies); + if (niattrib) iattrib.resize(nbodies, niattrib); + if (ndattrib) dattrib.resize(nbodies, niattrib); + + pot. resize(nbodies); + potext.resize(nbodies); + + unsigned int ctr = 0; + for (auto it=particles.begin(); it!=particles.end(); it++) { + + // Get the particle pointer + auto &P = it->second; + + // Pack HDF5 data + if (masses) mas[ctr] = P->mass; + if (IDs) ids[ctr] = P->indx; + for (int j=0; j<3; j++) pos(ctr, j) = P->pos[j]; + for (int j=0; j<3; j++) vel(ctr, j) = P->vel[j]; + pot [ctr] = P->pot; + potext[ctr] = P->potext; + for (int j=0; jiattrib[j]; + for (int j=0; jdattrib[j]; + + // Increment array position + ctr++; + } + + // Set properties, if requested + auto dcpl1 = HighFive::DataSetCreateProps{}; + auto dcpl3 = HighFive::DataSetCreateProps{}; + auto dcplI = HighFive::DataSetCreateProps{}; + auto dcplD = HighFive::DataSetCreateProps{}; + + if (H5compress or H5chunk) { + int chunk = H5chunk; + + // Sanity + if (H5chunk >= nbodies) { + chunk = nbodies/8; + } + + dcpl1.add(HighFive::Chunking(chunk)); + if (H5shuffle) dcpl1.add(HighFive::Shuffle()); + dcpl1.add(HighFive::Deflate(H5compress)); + + dcpl3.add(HighFive::Chunking(chunk, 3)); + if (H5shuffle) dcpl3.add(HighFive::Shuffle()); + dcpl3.add(HighFive::Deflate(H5compress)); + + if (niattrib) { + dcplI.add(HighFive::Chunking(chunk, niattrib)); + if (H5shuffle) dcplI.add(HighFive::Shuffle()); + dcplI.add(HighFive::Deflate(H5compress)); + } + if (ndattrib) { + dcplD.add(HighFive::Chunking(chunk, ndattrib)); + if (H5shuffle) dcplD.add(HighFive::Shuffle()); + dcplD.add(HighFive::Deflate(H5compress)); + } + } + + // Add all datasets + if (masses) { + HighFive::DataSet ds = group.createDataSet("Masses", mas, dcpl1); + } + + if (IDs) { + HighFive::DataSet ds = group.createDataSet("ParticleIDs", ids, dcpl1); + } + + HighFive::DataSet dsPos = group.createDataSet("Coordinates", pos, dcpl3); + HighFive::DataSet dsVel = group.createDataSet("Velocities", vel, dcpl3); + + HighFive::DataSet dsPot = group.createDataSet("Potential", pot, dcpl1); + HighFive::DataSet dsExt = group.createDataSet("PotentialExt", potext, dcpl1); + + if (niattrib>0) + HighFive::DataSet dsInt = group.createDataSet("IntAttributes", iattrib, dcplI); + + if (ndattrib>0) + HighFive::DataSet dsReal = group.createDataSet("RealAttributes", dattrib, dcplD); +} + +// Explicit instantiations for float and double +template +void Component::write_HDF5 +(HighFive::Group& group, bool masses, bool IDs); + +template +void Component::write_HDF5 +(HighFive::Group& group, bool masses, bool IDs); + +// Helper structure for reading and writing the HDF5 compound data +// type. Used by Component and ParticleReader. +template +struct H5Particle +{ + unsigned long id; + T mass; + T pos[3]; + T vel[3]; + T pot; + T potext; + hvl_t iattrib; // HDF5 variable length + hvl_t dattrib; // HDF5 variable length +}; + +// Write HDF5 phase-space structure for this nodes particles only in +// PSP style +template +void Component::write_H5(H5::Group& group) +{ + try { + + // Define the compound datatype + H5::CompType compound_type(sizeof(H5Particle)); + + H5::DataType type; + if constexpr (std::is_same_v) + type = H5::PredType::NATIVE_DOUBLE; + else if constexpr (std::is_same_v) + type = H5::PredType::NATIVE_FLOAT; + else assert(0); + + // Add members to the type + compound_type.insertMember("id", HOFFSET(H5Particle, id), H5::PredType::NATIVE_INT); + compound_type.insertMember("mass", HOFFSET(H5Particle, mass), type); + + hsize_t dims3[1] = {3}; + H5::ArrayType array3_type(type, 1, dims3); + + compound_type.insertMember("pos", HOFFSET(H5Particle, pos ), array3_type); + compound_type.insertMember("vel", HOFFSET(H5Particle, vel ), array3_type); + compound_type.insertMember("pot", HOFFSET(H5Particle, pot ), type); + compound_type.insertMember("potext", HOFFSET(H5Particle, potext), type); + compound_type.insertMember("iattrib", HOFFSET(H5Particle, iattrib), H5::VarLenType(H5::PredType::NATIVE_INT)); + compound_type.insertMember("dattrib", HOFFSET(H5Particle, dattrib), H5::VarLenType(type)); + + + // Create the data space + std::vector> h5_particles(particles.size()); + + size_t n = 0; + for (auto it=particles.begin(); it!=particles.end(); it++, n++) { + + // Get the particle pointer + auto &P = it->second; + + // Load the data + h5_particles[n].id = P->indx; + h5_particles[n].mass = P->mass; + for (int k=0; k<3; k++) { + h5_particles[n].pos[k] = P->pos[k]; + h5_particles[n].vel[k] = P->vel[k]; + } + h5_particles[n].pot = P->pot; + h5_particles[n].potext = P->potext; + h5_particles[n].iattrib.len = P->iattrib.size(); + h5_particles[n].dattrib.len = P->dattrib.size(); + h5_particles[n].iattrib.p = P->iattrib.data(); + h5_particles[n].dattrib.p = P->dattrib.data(); + } + + // Set properties, if requested for chunking and compression + H5::DSetCreatPropList dcpl; + + // This could be generalized by registering a user filter, like + // blosc. Right now, we're using the default (which is gzip) + if (H5compress or H5chunk) { + // Set chunking + if (H5chunk) { + // Sanity + int chunk = H5chunk; + if (H5chunk >= nbodies) { + chunk = nbodies/8; + } + hsize_t chunk_dims[1] = {static_cast(chunk)}; + dcpl.setChunk(1, chunk_dims); + } + + // Set compression level + if (H5compress) dcpl.setDeflate(H5compress); + + // Enable shuffle filter + if (H5shuffle) dcpl.setShuffle(); + } + + // Create dataspace + hsize_t dims[1] = {h5_particles.size()}; + H5::DataSpace dataspace(1, dims); + + // Create dataset + H5::DataSet dataset = group.createDataSet("particles", compound_type, dataspace, dcpl); + + // Write data to the dataset + dataset.write(h5_particles.data(), compound_type); + + } catch (H5::Exception &error) { + throw std::runtime_error("Component::writeH5 error: " + error.getDetailMsg()); + } + +} + +// Explicit instantiations for float and double +template +void Component::write_H5(H5::Group& group); + +template +void Component::write_H5(H5::Group& group); + + void Component::write_binary_header(ostream* out, bool real4, const std::string prefix, int nth) { ComponentHeader header; diff --git a/src/ComponentContainer.cc b/src/ComponentContainer.cc index f399a59d0..8f95a49d3 100644 --- a/src/ComponentContainer.cc +++ b/src/ComponentContainer.cc @@ -4,12 +4,14 @@ #include +#include #include #include #include #include #include +#include #include #ifdef USE_GPTL @@ -46,99 +48,223 @@ void ComponentContainer::initialize(void) read_rates(); // Read initial processor rates - bool SPL = false; // Indicate whether file has SPL prefix - unsigned char ir = 0; - unsigned char is = 0; - // Look for a restart file + bool SPL = false; // Indicates whether file has the SPL prefix + bool HDF5 = false; // Indicates whether file is an HDF5 file + + unsigned short ir = 0; // Number of restart files + unsigned short is = 0; // Number of SPL files + unsigned short ih = 0; // Number of HDF5 files + + // Look for a restart file + // if (myid==0) { - string resfile = outdir + infile; - ifstream in(resfile.c_str()); - if (in) { - if (ignore_info) - cerr << "---- ComponentContainer successfully opened <" - << resfile << ">, assuming a new run using a previous phase space as initial conditions" << endl; - else - cerr << "---- ComponentContainer successfully opened <" - << resfile << ">, assuming a restart" << endl; - ir = 1; + std::string resfile = outdir + infile; + + std::filesystem::path dir_path = resfile; + // If restart path is a directory, + // assume HDF5 + if (std::filesystem::is_directory(dir_path)) { + HDF5 = true; + try { + for (const auto& entry : std::filesystem::directory_iterator(dir_path)) { + if (std::filesystem::is_regular_file(entry)) { + ir++; + if (H5::H5File::isHdf5(entry.path().string())) ih++; + } + } + } catch (const std::filesystem::filesystem_error& e) { + std::cerr << "Component::initialize error: " << e.what() << std::endl; + } + } else { - cerr << "---- ComponentContainer could not open <" - << resfile << ">, assuming a new run" << endl; - ir = 0; + std::ifstream in(resfile.c_str()); + if (in) { + if (ignore_info) + std::cerr << "---- ComponentContainer successfully opened <" + << resfile << ">, assuming a new run using a previous phase space as initial conditions" << std::endl; + else + std::cerr << "---- ComponentContainer successfully opened <" + << resfile << ">, assuming a restart" << std::endl; + ir = 1; + } else { + std::cerr << "---- ComponentContainer could not open <" + << resfile << ">, assuming a new run" << std::endl; + ir = 0; + } + if (infile.find("SPL") != std::string::npos) is = 1; } - if (infile.find("SPL") != std::string::npos) is = 1; } - MPI_Bcast(&ir, 1, MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); - MPI_Bcast(&is, 1, MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); + // Share file counts and HDF5 detection + // + MPI_Bcast(&ir, 1, MPI_UNSIGNED_SHORT, 0, MPI_COMM_WORLD); + MPI_Bcast(&is, 1, MPI_UNSIGNED_SHORT, 0, MPI_COMM_WORLD); + MPI_Bcast(&ih, 1, MPI_UNSIGNED_SHORT, 0, MPI_COMM_WORLD); + MPI_Bcast(&HDF5, 1, MPI_CXX_BOOL, 0, MPI_COMM_WORLD); + // Set restart flags. 'restart' is an EXP global. 'SPL' and 'HDF5' + // are local to this member function. + // restart = ir ? true : false; SPL = is ? true : false; + // Begin phase space recovery + // if (restart) { - struct MasterHeader master; - ifstream in; + if (HDF5) { - // Open file - if (myid==0) { + // Sanity check: must have at least one HDF5 file in the + // directory + if (ih<1) + throw std::runtime_error("ComponentContainer::initialize HDF5 restart directory found but no HDF5 files found"); - string resfile = outdir + infile; - in.open(resfile); - if (in.fail()) { - throw FileOpenError(resfile, __FILE__, __LINE__); + auto hasEnding = [](const std::string& fullStr, + const std::string& ending) -> bool + { + if (fullStr.length() >= ending.length()) { + return fullStr.compare(fullStr.length() - ending.length(), + ending.length(), ending) == 0; + } else { + return false; + } + }; + + std::filesystem::path dir_path = outdir + infile; + std::vector files; + try { + for (const auto& entry : std::filesystem::directory_iterator(dir_path)) { + auto file = entry.path().string(); + if (H5::H5File::isHdf5(file)) { + // Ignore checkpoint backup files + if (not hasEnding(file, ".bak")) files.push_back(file); + } + } + } catch (const std::filesystem::filesystem_error& e) { + std::cerr << "ComponentContainer::initialize HDF5 file listing error: " << e.what() << std::endl; } - in.read((char *)&master, sizeof(MasterHeader)); - if (in.fail()) { - std::ostringstream sout; - sout << "ComponentContainer::initialize: " - << "could not read master header from <" - << resfile << ">"; - throw GenericError(sout.str(), __FILE__, __LINE__); + // Need to sort these files so that ".1" is first in the list so + // we can elide the metadata from the other HDF5 files + if (files.size() > 1) { + + // Function to extract the numerical substring and convert to + // an integer + auto getIndex = [](const std::string& str) -> int + { + auto pos = str.find_last_of("."); // File ends in .int + if (pos == std::string::npos) return 0; // No index + else return std::stoi(str.substr(pos+1)); // Extract index + }; + + // Custom comparison function + auto compareStringsByIndex = + [&getIndex](const std::string& a, const std::string& b) -> bool + { + return getIndex(a) < getIndex(b); + }; + + // Sort the files by index + std::sort(files.begin(), files.end(), compareStringsByIndex); } - if (ignore_info) { - cout << "Found: " - << " Ntot=" << master.ntot - << " Ncomp=" << master.ncomp << endl; + PR::PSPhdf5 reader(files, true); - } else { - cout << "Recovering from: " - << " Tnow=" << master.time - << " Ntot=" << master.ntot - << " Ncomp=" << master.ncomp << endl; + auto types = reader.GetTypes(); + ncomp = types.size(); + + if (not ignore_info) tnow = reader.CurrentTime(); - tnow = master.time; + if (myid==0) { + if (ignore_info) { + cout << "---- ComponentContainer found: " + << " Ntot=" << reader.CurrentNumber() + << " Ncomp=" << types.size() << std::endl; + + } else { + cout << "---- ComponentContainer recovering from: " + << " Tnow=" << tnow + << " Ntot=" << reader.CurrentNumber() + << " Ncomp=" << types.size() << std::endl; + } + } + + YAML::Node comp = parse["Components"]; + + // Will use ParticleReader to load particles without the usual + // ParticleFerry + if (comp.IsSequence()) { + for (int i=0; i"; + throw GenericError(sout.str(), __FILE__, __LINE__); + } + + if (ignore_info) { + cout << "---- ComponentContainer found: " + << " Ntot=" << master.ntot + << " Ncomp=" << master.ncomp << std::endl; + + } else { + cout << "---- ComponentContainer recovering from: " + << " Tnow=" << master.time + << " Ntot=" << master.ntot + << " Ncomp=" << master.ncomp << std::endl; - MPI_Bcast(&tnow, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + tnow = master.time; + } + + ntot = master.ntot; + ncomp = master.ncomp; + } - MPI_Bcast(&ntot, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&tnow, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + MPI_Bcast(&ntot, 1, MPI_INT, 0, MPI_COMM_WORLD); - MPI_Bcast(&ncomp, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&ncomp, 1, MPI_INT, 0, MPI_COMM_WORLD); - YAML::Node comp = parse["Components"]; + YAML::Node comp = parse["Components"]; - if (comp.IsSequence()) { - for (int i=0; i: " << e.what() << std::endl; + try { + in.close(); + } + catch (const ifstream::failure& e) { + std::cout << "ComponentContainer: exception closing file <" + << outdir + infile << ">: " << e.what() << std::endl; + } } } else { diff --git a/src/Cylinder.H b/src/Cylinder.H index 0faf7ddaa..071420faa 100644 --- a/src/Cylinder.H +++ b/src/Cylinder.H @@ -35,6 +35,8 @@ class MixtureBasis; @param hcyl is the scale height + @param sech2 if true, use hcyl as sech^2(z/(2*hcyl)) + @param lmax is the maximum spherical harmonic index for EOF construction @param mmax is the maximum azimuthal order for the resulting basis @@ -110,7 +112,7 @@ class Cylinder : public Basis { private: - bool precond, EVEN_M, subsamp; + bool precond, EVEN_M, subsamp, sech2 = false; int rnum, pnum, tnum; double ashift; unsigned int vflag; diff --git a/src/Cylinder.cc b/src/Cylinder.cc index af8103e0d..54cca39b3 100644 --- a/src/Cylinder.cc +++ b/src/Cylinder.cc @@ -29,6 +29,7 @@ Cylinder::valid_keys = { "rcylmax", "acyl", "hcyl", + "sech2", "hexp", "snr", "evcut", @@ -271,9 +272,10 @@ Cylinder::Cylinder(Component* c0, const YAML::Node& conf, MixtureBasis *m) : auto DiskDens = [&](double R, double z, double phi) { if (dfunc) return (*dfunc)(R, z, phi); - double f = exp(-fabs(z)/hcyl); // Overflow prevention + double h = sech2 ? 0.5*hcyl : hcyl; + double f = exp(-fabs(z)/h); // Overflow prevention double s = 2.0*f/(1.0 + f*f); // in sech computation - return exp(-R/acyl)*s*s/(4.0*M_PI*acyl*acyl*hcyl); + return exp(-R/acyl)*s*s/(4.0*M_PI*acyl*acyl*h); }; // The conditioning function for the EOF with an optional shift @@ -339,6 +341,7 @@ Cylinder::Cylinder(Component* c0, const YAML::Node& conf, MixtureBasis *m) : << std::endl << sep << "npca0=" << npca0 << std::endl << sep << "pcadiag=" << pcadiag << std::endl << sep << "cachename=" << cachename + << std::endl << sep << "sech2=" << std::boolalpha << sech2 << std::endl << sep << "selfgrav=" << std::boolalpha << self_consistent << std::endl << sep << "logarithmic=" << logarithmic << std::endl << sep << "vflag=" << vflag @@ -367,6 +370,7 @@ Cylinder::Cylinder(Component* c0, const YAML::Node& conf, MixtureBasis *m) : << std::endl << sep << "npca0=" << npca0 << std::endl << sep << "pcadiag=" << pcadiag << std::endl << sep << "cachename=" << cachename + << std::endl << sep << "sech2=" << std::boolalpha << sech2 << std::endl << sep << "selfgrav=" << std::boolalpha << self_consistent << std::endl << sep << "logarithmic=" << logarithmic << std::endl << sep << "vflag=" << vflag @@ -414,6 +418,7 @@ void Cylinder::initialize() if (conf["acyl" ]) acyl = conf["acyl" ].as(); if (conf["hcyl" ]) hcyl = conf["hcyl" ].as(); + if (conf["sech2" ]) sech2 = conf["sech2" ].as(); if (conf["hexp" ]) hexp = conf["hexp" ].as(); if (conf["snr" ]) snr = conf["snr" ].as(); if (conf["evcut" ]) rem = conf["evcut" ].as(); @@ -453,6 +458,14 @@ void Cylinder::initialize() if (conf["cmapz" ]) cmapZ = conf["cmapz" ].as(); if (conf["vflag" ]) vflag = conf["vflag" ].as(); + // Deprecation warning + if (not sech2 and not conf["pyname"]) { + if (myid==0) + std::cout << "---- Cylinder uses sech^2(z/h) rather than the more common sech^2(z/(2h))" << std::endl + << "---- Use the 'sech2: true' in your YAML config to use sech^2(z/(2h))" << std::endl + << "---- Cylinder will assume sech^2(z/(2h)) by default in v 7.9.0 and later" << std::endl; + } + // Deprecation warning if (conf["expcond"]) { if (myid==0) diff --git a/src/Orient.cc b/src/Orient.cc index 99597dc24..afbfb2ffa 100644 --- a/src/Orient.cc +++ b/src/Orient.cc @@ -177,8 +177,13 @@ Orient::Orient(int n, int nwant, int naccel, unsigned Oflg, unsigned Cflg, bool allRead = true; for (int i=0; i<3; i++) { - if (line.eof()) allRead = false; - for (int k; k<3; k++) line >> pseudo(k); + if (line.eof()) { + allRead = false; + break; + } + else { + for (int k=0; k<3; k++) line >> pseudo(k); + } } if (allRead) { if (accel) accel->add(time, pseudo, axis1); diff --git a/src/OutHDF5.H b/src/OutHDF5.H new file mode 100644 index 000000000..c48f1337d --- /dev/null +++ b/src/OutHDF5.H @@ -0,0 +1,73 @@ +#ifndef _OutHDF5_H +#define _OutHDF5_H + +/** Write phase-space dumps at regular intervals from each node in + component pieces. This writer is indended to be Gadget-4 HDF5 + compliant. + + @param filename is the name of the output file + @param nint is the number of steps between dumps + @param nbeg is suffix of the first phase space %dump + @param timer set to true turns on wall-clock timer for PS output + @param gadget set to true for Gadget-4-type HDF5 + @param noids set to true turns off particle id writing + @param checkpt sets checkpoint mode: write a real8 snapshot with a unique filename + @param preserve prevents overwrite of snapshot files + @param expconfig set to true stashes the YAML configuration file in the HDF5 file + @param allmeta set to true writes all meta data to each HDF5 file + @param directory set to true stores all HDF5 files in a named directory +*/ +class OutHDF5 : public Output +{ + +private: + + std::string filename; + bool real4=true, real8=false, ids=true, gadget4=false, chkpt=false; + bool overwrite=false, expconfig=true, allmeta=false, directory=true, timer; + unsigned write_flags; + int nbeg; + void initialize(void); + std::vector masses; + std::vector multim; + std::string hdf_dir, chkpt_dir; + + //! Valid keys for YAML configurations + static const std::set valid_keys; + + //! Check for single particle mass on the first invocation + void checkParticleMasses(); + + //! Gadget-4 format + void RunGadget4(const std::string& path); + + //! PSP format + void RunPSP(const std::string& path); + + +public: + + //! Constructor + OutHDF5(const YAML::Node & conf); + + //! Provided by derived class to generate some output + /*! + \param nstep is the current time step used to decide whether or not + to %dump + \param mstep is the current multistep level to decide whether or not to dump multisteps + \param last should be true on final step to force phase space %dump + indepentently of whether or not the frequency criterion is met + \param timer set to true turns on wall-clock timer for PS output + \param noids set to true turns off particle id writing + \param gadget set to true for Gadget-4-type HDF5 + \param checkpt set to true for checkpoint mode + \param preserve set to true prevents overwrite of snapshot files + \param expconfig set to true stashes the YAML configuration file in the HDF5 file + \param allmeta set to true writes all meta data to each HDF5 file + \param directory set to true stores all HDF5 files in a named directory + */ + void Run(int nstep, int mstep, bool last); + +}; + +#endif diff --git a/src/OutHDF5.cc b/src/OutHDF5.cc new file mode 100644 index 000000000..b744d9c43 --- /dev/null +++ b/src/OutHDF5.cc @@ -0,0 +1,818 @@ +#include +#include +#include +#include +#include +#include +#include + +// For unlink +#include + +// H5 API +#include + +// HighFive API for HDF5 +#include +#include + +#include "expand.H" +#include + +#include + +const std::set +OutHDF5::valid_keys = { + "filename", + "nint", + "nintsub", + "nbeg", + "noids", + "real4", + "real8", + "timer", + "gadget", + "checkpt", + "preserve", + "H5compress", + "H5chunk", + "expconfig", + "allmeta" +}; + +OutHDF5::OutHDF5(const YAML::Node& conf) : Output(conf) +{ + initialize(); +} + +void OutHDF5::initialize() +{ + // Remove matched keys + // + for (auto v : valid_keys) current_keys.erase(v); + + // Assign values from YAML + // + try { + // Get file name + if (Output::conf["filename"]) + filename = Output::conf["filename"].as(); + else{ + filename.erase(); + filename = "H5_" + runtag; + } + + if (Output::conf["nint"]) + nint = Output::conf["nint"].as(); + else + nint = 100; + + if (Output::conf["nintsub"]) { +#ifdef ALLOW_NINTSUB + nintsub = Output::conf["nintsub"].as(); + if (nintsub <= 0) nintsub = 1; +#else + nintsub_warning("OutHDF5"); + nintsub = std::numeric_limits::max(); +#endif + } else + nintsub = std::numeric_limits::max(); + + if (Output::conf["nbeg"]) + nbeg = Output::conf["nbeg"].as(); + else + nbeg = 0; + + if (Output::conf["real4"]) { + real4 = Output::conf["real4"].as(); + real8 = not real4; + } else { + real4 = true; + real8 = false; + } + + if (Output::conf["real8"]) { + real8 = Output::conf["real8"].as(); + real4 = not real8; + } + + // This will override user setting of real4... + if (Output::conf["checkpt"]) { + chkpt = true; + real8 = true; + real4 = false; + } + + write_flags = H5F_ACC_TRUNC; + if (Output::conf["preserve"]) { + bool preserve = Output::conf["preserve"].as(); + write_flags = preserve ? H5F_ACC_EXCL : H5F_ACC_TRUNC; + } + + if (Output::conf["noids"]) { + ids = not Output::conf["noids"].as(); + } + + if (Output::conf["timer"]) + timer = Output::conf["timer"].as(); + else + timer = false; + + if (Output::conf["gadget"]) + gadget4 = Output::conf["gadget"].as(); + else + gadget4 = false; + + if (Output::conf["expconfig"]) + expconfig = Output::conf["expconfig"].as(); + + if (Output::conf["allmeta"]) + allmeta = Output::conf["allmeta"].as(); + + if (Output::conf["directory"]) + directory = Output::conf["directory"].as(); + + // Default HDF5 compression is no compression. By default, + // shuffle is on unless turned off manually. + // + int H5compress = 0, H5chunk = 0; + bool H5shuffle= true; + + if (Output::conf["H5compress"]) + H5compress = Output::conf["H5compress"].as(); + + if (Output::conf["H5shuffle"]) + H5shuffle = Output::conf["H5shuffle"].as(); + + if (Output::conf["H5chunk"]) + H5chunk = Output::conf["H5chunk"].as(); + + // Register compression parameters win the Component instance + if (H5chunk>0) + for (auto c : comp->components) c->setH5(H5compress, H5shuffle, H5chunk); + } + catch (YAML::Exception & error) { + if (myid==0) std::cout << "Error parsing parameters in OutHDF5: " + << error.what() << std::endl + << std::string(60, '-') << std::endl + << "Config node" << std::endl + << std::string(60, '-') << std::endl + << conf << std::endl + << std::string(60, '-') << std::endl; + throw std::runtime_error("OutHDF5::initialize: error parsing YAML"); + } + + + // Determine last file + // + if (restart && nbeg==0) { + + // Only root node looks for paths + // + if (myid==0) { + + // First check for a directory + // + if (directory) { + std::ostringstream dname; + dname << outdir + << filename << "_" << setw(5) << setfill('0') << nbeg; + + std::filesystem::path dir_path = dname.str(); + + // Sanity check + if (not std::filesystem::is_directory(dir_path)) { + throw std::runtime_error("Component::initialize error: you specified directory organization of output but the directory " + dir_path.string() + " does not exist"); + } + } + else { + std::ostringstream fname; + fname << outdir + << filename << "_" << setw(5) << setfill('0') << nbeg + << ".1"; + + std::filesystem::path file_path = fname.str(); + + if (not std::filesystem::is_regular_file(file_path)) { + throw std::runtime_error("Component::initialize error: you specified file organization of output but the file " + fname.str() + " does not exist"); + } + } + + // Find starting index + // + for (; nbeg<100000; nbeg++) { + // Path name + // + std::ostringstream fname; + fname << outdir + << filename << "_" << setw(5) << setfill('0') << nbeg; + + if (not directory) fname << ".1"; + + std::filesystem::path path = fname.str(); + + // See if we can open the directory or file + // + if (directory) { + if (not std::filesystem::is_directory(path)) break; + } else + if (not std::filesystem::is_regular_file(path)) break; + } + + std::cout << "---- OutHDF5: found last file <" << nbeg << ">" << std::endl; + } + + // Communicate starting file index to all nodes + // + MPI_Bcast(&nbeg, 1, MPI_INT, 0, MPI_COMM_WORLD); + } + + return; +} + + +void OutHDF5::Run(int n, int mstep, bool last) +{ + if (!dump_signal and !last) { + if (n % nint) return; + if (restart && n==0) return; + if (multistep>1 && mstep % nintsub !=0) return; + } + + std::chrono::high_resolution_clock::time_point beg, end; + if (timer) beg = std::chrono::high_resolution_clock::now(); + + std::ofstream out; + std::string h5_dir; + std::ostringstream fname; + + // Checkpoint mode + if (chkpt) { + + // On first call, create checkpoint directory + if (directory and chkpt_dir.size()==0) { + std::ostringstream dname; + dname << outdir << "checkpoint_" << runtag; + + std::filesystem::path dir_path = dname.str(); + + chkpt_dir = dir_path.string() + "/"; + + bool okay = true; + if (myid==0) { + if (not std::filesystem::is_directory(dir_path)) { + if (std::filesystem::create_directory(dir_path)) { + if (VERBOSE>5) + std::cout << "---- OutHDF5: checkpoint directory <" + << dir_path.string() << "> created successfully" + << std::endl; + okay = true; + } else { + if (VERBOSE>5) + std::cout << "---- OutHDF5: checkpoint directory <" + << dir_path.string() << "> creation failed" + << std::endl; + okay = false; + } + } + } + + MPI_Bcast(&okay, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); + + if (not okay) + throw std::runtime_error + ("OutHDF5: Failed to create checkpoint directory " + + dir_path.string()); + + } + + // Create checkpoint filename + // + fname << "checkpoint_" << runtag; + if (numprocs>1) fname << "." << myid+1; + + // Create backup filename + // + std::string currfile = chkpt_dir + fname.str(); + std::string backfile = currfile + ".bak"; + + // Remove old backup file + // + if (unlink(backfile.c_str())) { + if (VERBOSE>5) { + perror("---- OutHDF5::Run() perror"); + std::cout << "---- OutHDF5::Run(): error unlinking old backup file <" + << backfile << ">, it may not exist" << std::endl; + } + } else { + if (VERBOSE>5) { + std::cout << "---- OutHDF5::Run(): successfully unlinked <" + << backfile << ">" << std::endl; + } + } + + // Rename current file to backup file + // + if (rename(currfile.c_str(), backfile.c_str())) { + if (VERBOSE>5) { + perror("---- OutHDF5::Run() perror"); + std::cout << "---- OutHDF5: renaming backup file <" + << backfile << ">, it may not exist" << std::endl; + } + } else { + if (VERBOSE>5) { + std::cout << "---- OutHDF5::Run(): successfully renamed <" + << currfile << "> to <" << backfile << ">" << std::endl; + } + } + } + // Standard snapshot mode + else { + + // Create filename prefix + // + fname << filename << "_" << setw(5) << setfill('0') << nbeg++; + + if (directory) { + bool okay = true; + + // The prefix becomes the directory name + // + std::ostringstream dname; + dname << outdir << fname.str(); + + // Root process creates the directory + // + if (myid==0) { + std::filesystem::path dir_path = dname.str(); + + if (not std::filesystem::is_directory(dir_path)) { + if (std::filesystem::create_directory(dir_path)) { + if (VERBOSE>5) + std::cout << "HDF5 directory <" << dir_path.string() + << "> created successfully" << std::endl; + okay = true; + } else { + if (VERBOSE>5) + std::cout << "HDF5 directory <" << dir_path.string() + << "> creation failed" << std::endl; + okay = false; + } + } + } + + MPI_Bcast(&okay, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); + + if (not okay) + throw std::runtime_error + ("OutHDF5: Failed to create HDF5 file directory " + dname.str()); + + h5_dir = fname.str() + "/"; + } + if (numprocs>1) fname << "." << myid+1; + } + + // Full path + std::string path; + + if (directory) { // Append directory mode path + if (chkpt) path = chkpt_dir; + else path = outdir + h5_dir; + } + path += fname.str(); // Append filename + + if (gadget4) RunGadget4(path); + else RunPSP(path); + + chktimer.mark(); + if (VERBOSE>5) perror("---- OutHDF5::Run() perror"); + + dump_signal = 0; + + if (timer) { + end = std::chrono::high_resolution_clock::now(); + std::chrono::duration intvl = end - beg; + if (myid==0) + std::cout << "OutHDF5 [T=" << tnow << "] timing=" << intvl.count() + << std::endl; + } +} + +void OutHDF5::RunGadget4(const std::string& path) +{ + int nOK = 0; + + std::unique_ptr file; + + // Begin HDF5 file writing + // + try { + + // Silence the HDF5 error stack + // + HighFive::SilenceHDF5 quiet; + + // Try opening the file as HDF5 + // + file = std::make_unique(path, + HighFive::File::ReadWrite | + HighFive::File::Create); + try { + // Create a new group for Header + // + HighFive::Group header = file->createGroup("Header"); + + if (masses.size()==0) checkParticleMasses(); + header.createAttribute("MassTable", masses); + + std::vector nums(masses.size()); + { + int n=0; + for (auto c : comp->components) nums[n++] = c->Number(); + } + header.createAttribute("NumPart_ThisFile", nums); + header.createAttribute("Time", tnow); + + // Only attach metadata to the first file + // + if (allmeta or myid==0) { + + int dp = 1; + header.createAttribute("Flag_DoublePrecision", dp); + + double hubble = 1, zero = 0; + header.createAttribute("HubbleParam", hubble); + + header.createAttribute("Omega0", zero); + + header.createAttribute("OmegaBaryon", zero); + + header.createAttribute("OmegaLambda", zero); + + header.createAttribute("Redshift", zero); + + header.createAttribute("NumFilesPerSnapshot", numprocs); + + { + int n=0; + for (auto c : comp->components) nums[n++] = c->CurTotal(); + } + header.createAttribute("NumPart_Total", nums); + + // Create a new group for Config + // + HighFive::Group config = file->createGroup("Config"); + + int style = 0; + config.createAttribute("PSPstyle", style); + + int ntypes = comp->components.size(); + config.createAttribute("NTYPES", ntypes); + + config.createAttribute("DOUBLEPRECISION", dp); + + std::vector Niattrib, Ndattrib; + for (auto & c : comp->components) { + Niattrib.push_back(c->niattrib); + Ndattrib.push_back(c->ndattrib); + } + + config.createAttribute("Niattrib", Niattrib); + + config.createAttribute("Ndattrib", Ndattrib); + + // Create a new group for Parameters + // + HighFive::Group params = file->createGroup("Parameters"); + + std::string gcommit(GIT_COMMIT), gbranch(GIT_BRANCH), gdate(COMPILE_TIME); + params.createAttribute("Git_commit", gcommit); + params.createAttribute("Git_branch", gbranch); + params.createAttribute("Compile_date", gdate); + + std::vector names, forces, configs; + for (auto c : comp->components) { + names.push_back(c->name); + forces.push_back(c->id); + YAML::Emitter out; + out << c->fconf; // where node is your YAML::Node + configs.push_back(out.c_str()); + } + + params.createAttribute("ComponentNames", names); + params.createAttribute("ForceMethods", forces); + params.createAttribute("ForceConfigurations", configs); + + if (expconfig) { + // Save the entire EXP YAML configuration + YAML::Emitter cparse; + cparse << parse; + + std::string cyml(cparse.c_str()); + params.createAttribute("EXPConfiguration", cyml); + } + } + + } catch (HighFive::Exception& err) { + std::string msg("OutHDF5: error writing HDF5 file, "); + throw std::runtime_error(msg + err.what()); + } + + } catch (HighFive::Exception& err) { + std::cerr << "OutHDF5 [" << myid << "]: can't open file <" << path + << "> . . . quitting" << std::endl; + nOK = 1; + } + + MPI_Allreduce(MPI_IN_PLACE, &nOK, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + // Exit on file open failure + // + if (nOK) { + MPI_Finalize(); + exit(33); + } + + int count = 0; + for (auto c : comp->components) { + +#ifdef HAVE_LIBCUDA + if (use_cuda) { + if (c->force->cudaAware() and not comp->fetched[c]) { + comp->fetched[c] = true; + c->CudaToParticles(); + } + } +#endif + + std::ostringstream sout; + sout << "PartType" << count; + + auto pgroup = file->createGroup(sout.str()); + + if (real4) + c->write_HDF5(pgroup, multim[count], ids); + else + c->write_HDF5(pgroup, multim[count], ids); + + count++; + } +} + + +// Template to write a scalar attribute for types used here +template +void writeScalar(H5::Group& group, const std::string& name, T value) + { + // Create a dataspace for the attribute (scalar in this case) + H5::DataSpace space(H5S_SCALAR); + + H5::DataType type; + + // Create a int datatype + if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_INT; + } + else if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_ULONG; + } + else if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_FLOAT; + } + else if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_DOUBLE; + } + else if constexpr (std::is_same_v) { + type = H5::StrType(0, H5T_VARIABLE); + } + else { + assert(0); + } + + // Create the attribute + H5::Attribute attribute = group.createAttribute(name, type, space); + + // Treat string separately + if constexpr (std::is_same_v) { + const char* strPtr = value.c_str(); + attribute.write(type, &strPtr); + } else + attribute.write(type, &value); + }; + + +// Template to write a std::vector attribute for types used here +template +void writeVector(H5::Group& group, const std::string& name, std::vector& value) + { + // Create a dataspace for the attribute + hsize_t attrDims[1] = {value.size()}; + H5::DataSpace attribute_space(1, attrDims); + + H5::DataType type; + + // Create a int datatype + if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_INT; + } + else if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_ULONG; + } + else if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_FLOAT; + } + else if constexpr (std::is_same_v) { + type = H5::PredType::NATIVE_DOUBLE; + } + else if constexpr (std::is_same_v) { + type = H5::StrType(0, H5T_VARIABLE); + } + else { + assert(0); + } + + // Create the attribute + H5::Attribute attribute = group.createAttribute(name, type, attribute_space); + + // Treat string separately + if constexpr (std::is_same_v) { + std::vector strVec; + for (auto & v : value) strVec.push_back(v.c_str()); + attribute.write(type, strVec.data()); + } else + attribute.write(type, value.data()); + }; + + +void OutHDF5::RunPSP(const std::string& path) +{ + int nOK = 0; + + // Begin HDF5 file writing + // + try { + // Create a file + H5::H5File file(path.c_str(), write_flags); + + try { + + // Create a new group for Header + // + H5::Group header = file.createGroup("Header"); + + if (masses.size()==0) checkParticleMasses(); + writeVector(header, "MassTable", masses); + + std::vector nums(masses.size()); + { + int n=0; + for (auto c : comp->components) nums[n++] = c->Number(); + } + writeVector(header, "NumPart_ThisFile", nums); + + writeScalar(header, "Time", tnow); + + // Only attach metadata to first process + // + if (allmeta or myid==0) { + int dp = real4 ? 0 : 1; + writeScalar(header, "Flag_DoublePrecision", dp); + + double hubble = 1, zero = 0; + + writeScalar(header, "HubbleParam", hubble); + writeScalar(header, "Omega0", zero ); + writeScalar(header, "OmegaBaryon", zero ); + writeScalar(header, "OmegaLambda", zero ); + writeScalar(header, "Redshift", zero ); + + writeScalar(header, "NumFilesPerSnapshot", numprocs); + + { + int n=0; + for (auto c : comp->components) nums[n++] = c->CurTotal(); + } + writeVector(header, "NumPart_Total", nums); + + // Create a new group for Config + // + H5::Group config = file.createGroup("Config"); + + int style = 1; + writeScalar(config, "PSPstyle", style); + + int ntypes = comp->components.size(); + writeScalar(config, "NTYPES", ntypes); + + writeScalar(config, "DOUBLEPRECISION", dp); + + std::vector Niattrib, Ndattrib; + for (auto & c : comp->components) { + Niattrib.push_back(c->niattrib); + Ndattrib.push_back(c->ndattrib); + } + writeVector(config, "Niattrib", Niattrib); + writeVector(config, "Ndattrib", Ndattrib); + + // Create a new group for Parameters + // + H5::Group params = file.createGroup("Parameters"); + + std::string gcommit(GIT_COMMIT), gbranch(GIT_BRANCH), gdate(COMPILE_TIME); + writeScalar(params, "Git_commit", gcommit); + writeScalar(params, "Git_branch", gbranch); + writeScalar(params, "Compile_date", gdate ); + + std::vector names, forces, configs; + for (auto c : comp->components) { + names.push_back(c->name); + forces.push_back(c->id); + YAML::Emitter out; + out << c->fconf; // where node is your YAML::Node + configs.push_back(out.c_str()); + } + + writeVector(params, "ComponentNames", names); + writeVector(params, "ForceMethods", forces); + writeVector(params, "ForceConfigurations", configs); + + if (expconfig) { + // Save the entire EXP YAML configuration + YAML::Emitter cparse; + cparse << parse; + + writeScalar(params, "EXPConfiguration", std::string(cparse.c_str())); + } + } + } catch (H5::Exception& error) { + throw std::runtime_error(std::string("OutHDF5: error writing HDF5 file ") + error.getDetailMsg()); + } + + int count = 0; + for (auto c : comp->components) { + +#ifdef HAVE_LIBCUDA + if (use_cuda) { + if (c->force->cudaAware() and not comp->fetched[c]) { + comp->fetched[c] = true; + c->CudaToParticles(); + } + } +#endif + + std::ostringstream sout; + sout << "PartType" << count; + + H5::Group group = file.createGroup(sout.str()); + + if (real4) + c->write_H5(group); + else + c->write_H5(group); + + count++; + } + } catch (H5::Exception& error) { + throw std::runtime_error(std::string("OutHDF5: error writing HDF5 file ") + error.getDetailMsg()); + } +} + +void OutHDF5::checkParticleMasses() +{ + for (auto c : comp->components) { + // First bunch of particles + int number = -1; + PartPtr *p = c->get_particles(&number); + + double minMass = std::numeric_limits::max(); + double maxMass = 0.0; + + // Keep going... + while (p) { + for (int k=0; kmass, minMass); + maxMass = std::max(P->mass, maxMass); + } + // Next bunch of particles + p = c->get_particles(&number); + } + + // Get the min and max mass for all processes + MPI_Allreduce(MPI_IN_PLACE, &minMass, 1, MPI_DOUBLE, MPI_MIN, + MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, &maxMass, 1, MPI_DOUBLE, MPI_MAX, + MPI_COMM_WORLD); + + // All processes generate masses and multim from the globally + // reduced values + if ( (maxMass - minMass)/maxMass < 1.0e-12) { + masses.push_back(maxMass); + multim.push_back(true); + } else { + masses.push_back(0.0); + multim.push_back(false); + } + } + // END: component loop +} diff --git a/src/Output.cc b/src/Output.cc index b48744ded..10c4a84e9 100644 --- a/src/Output.cc +++ b/src/Output.cc @@ -4,6 +4,13 @@ Output::Output(const YAML::Node& CONF) : conf(CONF) { - nint = 50; // Default interval + // Default interval + nint = 50; nintsub = std::numeric_limits::max(); + + // Add keys + for (YAML::const_iterator it=conf.begin(); it!=conf.end(); ++it) { + current_keys.insert(it->first.as()); + } + } diff --git a/src/OutputContainer.cc b/src/OutputContainer.cc index 8184d407f..189f7ee15 100644 --- a/src/OutputContainer.cc +++ b/src/OutputContainer.cc @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -71,6 +72,10 @@ void OutputContainer::initialize(void) out.push_back(new OutPSQ (node)); } + else if ( !name.compare("outhdf5") ) { + out.push_back(new OutHDF5 (node)); + } + else if ( !name.compare("outpsr") ) { out.push_back(new OutPSR (node)); } diff --git a/src/SphericalBasis.H b/src/SphericalBasis.H index cd949a96e..10b3a7899 100644 --- a/src/SphericalBasis.H +++ b/src/SphericalBasis.H @@ -232,6 +232,12 @@ protected: //! Flag self_consitency bool self_consistent; + //! Freeze monopole + bool FIX_L0; + + //! Frozen monopole coefficients + Eigen::VectorXd C0; + //! Flag fixed monopole bool NO_L0; diff --git a/src/SphericalBasis.cc b/src/SphericalBasis.cc index 6a0480147..4cde80982 100644 --- a/src/SphericalBasis.cc +++ b/src/SphericalBasis.cc @@ -31,6 +31,7 @@ SphericalBasis::valid_keys = { "rmin", "rmax", "self_consistent", + "FIX_L0", "NO_L0", "NO_L1", "EVEN_L", @@ -66,6 +67,7 @@ SphericalBasis::SphericalBasis(Component* c0, const YAML::Node& conf, MixtureBas mix = m; geometry = sphere; coef_dump = true; + FIX_L0 = false; NO_L0 = false; NO_L1 = false; EVEN_L = false; @@ -111,6 +113,7 @@ SphericalBasis::SphericalBasis(Component* c0, const YAML::Node& conf, MixtureBas } else self_consistent = true; + if (conf["FIX_L0"]) FIX_L0 = conf["FIX_L0"].as(); if (conf["NO_L0"]) NO_L0 = conf["NO_L0"].as(); if (conf["NO_L1"]) NO_L1 = conf["NO_L1"].as(); if (conf["EVEN_L"]) EVEN_L = conf["EVEN_L"].as(); @@ -1646,6 +1649,14 @@ void SphericalBasis::determine_acceleration_and_potential(void) } + // This should suffice for both CPU and GPU + if (FIX_L0) { + // Save the monopole coefficients on the first evaluation + if (C0.size() == 0) C0 = *expcoef[0]; + // Copy the saved coefficients to the active array + else *expcoef[0] = C0; + } + #if HAVE_LIBCUDA==1 if (use_cuda and cC->cudaDevice>=0 and cC->force->cudaAware()) { if (cudaAccelOverride) { diff --git a/src/user/CMakeLists.txt b/src/user/CMakeLists.txt index 1f7b4c85a..a419b9c64 100644 --- a/src/user/CMakeLists.txt +++ b/src/user/CMakeLists.txt @@ -12,6 +12,14 @@ set (common_INCLUDE_DIRS set (common_LINKLIB ${DEP_LIB} OpenMP::OpenMP_CXX MPI::MPI_CXX exputil EXPlib yaml-cpp) +set(user_SRC UserTest.cc) +set(bar_SRC UserBar.cc) +set(logpot_SRC UserLogPot.cc) +set(disk_SRC UserDisk.cc) +set(halo_SRC UserHalo.cc) +set(mndisk_SRC UserMNdisk.cc) +set(mwgala_SRC UserMW.cc) + if(ENABLE_CUDA) list(APPEND common_LINKLIB CUDA::toolkit CUDA::cudart) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) @@ -22,18 +30,8 @@ if(ENABLE_CUDA) # set_source_files_properties(UserBar.cc UserDisk.cc UserHalo.cc # UserLogPot.cc UserMNdisk.cc UserMW.cc UserTest.cc UserTestCuda.cc # PROPERTIES LANGUAGE CUDA) -endif() - -set(user_SRC UserTest.cc) -set(bar_SRC UserBar.cc) -set(logpot_SRC UserLogPot.cc) -set(disk_SRC UserDisk.cc) -set(halo_SRC UserHalo.cc) -set(mndisk_SRC UserMNdisk.cc) -set(mwgala_SRC UserMW.cc) - -if(ENABLE_CUDA) set(cudatest_SRC UserTestCuda.cc cudaUserTest.cu) + list(APPEND bar_SRC cudaUserBar.cu) endif() foreach(mlib ${USER_MODULES}) diff --git a/src/user/UserBar.H b/src/user/UserBar.H index 81136cd7f..6c07f33f0 100644 --- a/src/user/UserBar.H +++ b/src/user/UserBar.H @@ -24,13 +24,18 @@ class UserBar : public ExternalForce { private: - string angm_name, ctr_name; + std::string angm_name, ctr_name; Component *c0, *c1; void determine_acceleration_and_potential(void); void * determine_acceleration_and_potential_thread(void * arg); void initialize(); +#if HAVE_LIBCUDA==1 + //! Cuda implementation + void determine_acceleration_and_potential_cuda(); +#endif + double length, bratio, cratio, amplitude, Ton, Toff, DeltaT, Fcorot; bool fixed, soft; string filename; diff --git a/src/user/UserBar.cc b/src/user/UserBar.cc index db3f6eeed..458fe6e4b 100644 --- a/src/user/UserBar.cc +++ b/src/user/UserBar.cc @@ -30,7 +30,9 @@ UserBar::UserBar(const YAML::Node &conf) : ExternalForce(conf) ctr_name = ""; // Default component for com angm_name = ""; // Default component for angular momentum - initialize(); + initialize(); // Assign parameters + + cuda_aware = true; // Cuda routine is implemented if (ctr_name.size()>0) { // Look for the fiducial component @@ -175,10 +177,6 @@ void UserBar::determine_acceleration_and_potential(void) // Write to bar state file, if true bool update = false; -#if HAVE_LIBCUDA==1 // Cuda compatibility - getParticlesCuda(cC); -#endif - if (c1) { c1->get_angmom(); // Tell component to compute angular momentum // cout << "Lz=" << c1->angmom[2] << endl; // debug @@ -289,7 +287,6 @@ void UserBar::determine_acceleration_and_potential(void) << endl; } } - Iz = 0.2*mass*(a1*a1 + a2*a2); Lz = Iz * omega; @@ -390,7 +387,16 @@ void UserBar::determine_acceleration_and_potential(void) } } +#if HAVE_LIBCUDA==1 // Cuda compatibility + if (use_cuda) { + determine_acceleration_and_potential_cuda(); + } else { + getParticlesCuda(cC); // Cuda override + exp_thread_fork(false); + } +#else exp_thread_fork(false); +#endif if (myid==0 && update) { diff --git a/src/user/cudaUserBar.cu b/src/user/cudaUserBar.cu new file mode 100644 index 000000000..fc9ac7b88 --- /dev/null +++ b/src/user/cudaUserBar.cu @@ -0,0 +1,180 @@ +// -*- C++ -*- + +#include +#include + +#include +#include "UserBar.H" + +// Global device symbols for CUDA kernel +// +__device__ __constant__ +cuFP_t userBarAmp, userBarPosAng, userBarCen[3], userBarB5; + +__device__ __constant__ +int userBarSoft; + + +// Cuda implementation of bar force +// +__global__ void +userBarForceKernel(dArray P, dArray I, + int stride, PII lohi) +{ + const int tid = blockDim.x * blockIdx.x + threadIdx.x; + + for (int n=0; n=P._s) printf("out of bounds: %s:%d\n", __FILE__, __LINE__); +#endif + cudaParticle & p = P._v[I._v[npart]]; + + // Compute position relative to bar + cuFP_t rr = 0.0, fac, ffac, nn; + for (int k=0; k<3; k++) { + fpos[k] = p.pos[k] - userBarCen[k]; + rr += fpos[k] * fpos[k]; + } + rr = sqrt(rr); + + cuFP_t xx = fpos[0]; + cuFP_t yy = fpos[1]; + cuFP_t zz = fpos[2]; + cuFP_t pp = (xx*xx - yy*yy)*cos2p + 2.0*xx*yy*sin2p; + + if (userBarSoft) { + fac = 1.0 + rr/userBarB5; + ffac = -userBarAmp / pow(fac, 6.0); + nn = pp / (userBarB5*rr); + } else { + fac = 1.0 + pow(rr/userBarB5, 5.0); + ffac = -userBarAmp / (fac*fac); + nn = pp * pow(rr/userBarB5, 3.0)/ (userBarB5*userBarB5); + } + + // Add acceleration + p.acc[0] += ffac * ( 2.0*( xx*cos2p + yy*sin2p)*fac - 5.0*nn*xx); + p.acc[1] += ffac * ( 2.0*(-yy*cos2p + xx*sin2p)*fac - 5.0*nn*yy); + p.acc[2] += ffac * ( -5.0*nn*zz); + + // Add external potential + p.potext += -ffac*pp*fac; + + } // Particle index block + + } // END: stride loop + +} + + +__global__ +void testConstantsUserBar(cuFP_t tnow) +{ + printf("-------------------------\n"); + printf("---UserBar constants-----\n"); + printf("-------------------------\n"); + printf(" Time = %e\n", tnow ); + printf(" Amp = %e\n", userBarAmp ); + printf(" b5 = %e\n", userBarB5 ); + printf(" Center = %e, %e, %e\n", + userBarCen[0], userBarCen[1], userBarCen[2] ); + if (userBarSoft) printf(" Soft = true\n" ); + else printf(" Soft = false\n"); + printf("-------------------------\n"); +} + +void UserBar::determine_acceleration_and_potential_cuda() +{ + // Sanity check + // + int nbodies = cC->Number(); + if (nbodies != static_cast(cC->Particles().size())) { + std::cerr << "UserBar: ooops! number=" << nbodies + << " but particle size=" << cC->Particles().size() << endl; + nbodies = static_cast(cC->Particles().size()); + } + + double amp = afac * numfac * amplitude/fabs(amplitude) + * 0.5*(1.0 + erf( (tnow - Ton )/DeltaT )) + * 0.5*(1.0 - erf( (tnow - Toff)/DeltaT )) ; + + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, cC->cudaDevice); + cuda_check_last_error_mpi("cudaGetDeviceProperties", __FILE__, __LINE__, myid); + + // Stream structure iterators + // + auto cr = cC->cuStream; + + // Assign expansion center + // + auto cn = cC->getCenter(); + cuFP_t ctr[3], dtmp; + for (int k=0; k<3; k++) ctr[k] = cn[k]; + + cuda_safe_call(cudaMemcpyToSymbol(userBarAmp, &(dtmp=amp), sizeof(cuFP_t), + size_t(0), cudaMemcpyHostToDevice), + __FILE__, __LINE__, "Error copying userBarAmp"); + + cuda_safe_call(cudaMemcpyToSymbol(userBarPosAng, &(dtmp=posang), sizeof(cuFP_t), + size_t(0), cudaMemcpyHostToDevice), + __FILE__, __LINE__, "Error copying userBarPosAng"); + + cuda_safe_call(cudaMemcpyToSymbol(userBarCen, ctr, sizeof(cuFP_t)*3, + size_t(0), cudaMemcpyHostToDevice), + __FILE__, __LINE__, "Error copying userBarCen"); + + cuda_safe_call(cudaMemcpyToSymbol(userBarB5, &(dtmp=b5), sizeof(cuFP_t), + size_t(0), cudaMemcpyHostToDevice), + __FILE__, __LINE__, "Error copying userBarB5"); + + int cuSoft = 0; + if (soft) cuSoft = 1; + cuda_safe_call(cudaMemcpyToSymbol(userBarSoft, &cuSoft, sizeof(int), + size_t(0), cudaMemcpyHostToDevice), + __FILE__, __LINE__, "Error copying userBarSoft"); + + // VERBOSE diagnostic output on first ncheck calls + // + static int cntr = 0; + const int ncheck = 100; + + if (myid==0 and VERBOSE>4 and cntr < ncheck) { + testConstantsUserBar<<<1, 1, 0, cr->stream>>>(tnow); + cudaDeviceSynchronize(); + cuda_check_last_error_mpi("cudaDeviceSynchronize", __FILE__, __LINE__, myid); + cntr++; + } + + // Get particle index range for levels [mlevel, multistep] + // + PII lohi = cC->CudaGetLevelRange(mlevel, multistep); + + // Compute grid + // + unsigned int N = lohi.second - lohi.first; + unsigned int stride = N/BLOCK_SIZE/deviceProp.maxGridSize[0] + 1; + unsigned int gridSize = N/BLOCK_SIZE/stride; + + if (N>0) { + + if (N > gridSize*BLOCK_SIZE*stride) gridSize++; + + // Do the work + // + userBarForceKernel<<stream>>> + (toKernel(cr->cuda_particles), toKernel(cr->indx1), stride, lohi); + } +} diff --git a/utils/ICs/CMakeLists.txt b/utils/ICs/CMakeLists.txt index 0c89b9942..7ac2198cc 100644 --- a/utils/ICs/CMakeLists.txt +++ b/utils/ICs/CMakeLists.txt @@ -2,10 +2,10 @@ set(bin_PROGRAMS shrinkics gensph gendisk gendisk2d gsphere pstmod empinfo empdump eofbasis eofcomp testcoefs testcoefs2 testdeval forcetest hdf52accel forcetest2 cylcache modelfit test2d addsphmod - cubeics zangics slabics) + cubeics zangics slabics addring) set(common_LINKLIB OpenMP::OpenMP_CXX MPI::MPI_CXX yaml-cpp exputil - ${VTK_LIBRARIES} ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} + expui ${VTK_LIBRARIES} ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} ${HDF5_CXX_LIBRARIES}) if(ENABLE_CUDA) @@ -26,6 +26,7 @@ set(common_INCLUDE $ $ $ + $ $ $ ${CMAKE_BINARY_DIR} ${DEP_INC} @@ -88,6 +89,8 @@ add_executable(zangics ZangICs.cc) add_executable(slabics genslab.cc massmodel1d.cc) +add_executable(addring addring.cc) + foreach(program ${bin_PROGRAMS}) target_link_libraries(${program} ${common_LINKLIB}) target_include_directories(${program} PUBLIC ${common_INCLUDE}) diff --git a/utils/ICs/Disk2dHalo.cc b/utils/ICs/Disk2dHalo.cc index f923e4792..ad0e9a61b 100644 --- a/utils/ICs/Disk2dHalo.cc +++ b/utils/ICs/Disk2dHalo.cc @@ -401,7 +401,7 @@ void Disk2dHalo::set_halo(vector& phalo, int nhalo, int npart) for (int i=0; igen_point(ierr); + std::tie(ps, ierr) = multi->gen_point(); if (ierr) count1++; } while (ierr); @@ -2329,7 +2329,7 @@ void Disk2dHalo::set_vel_halo(vector& part) // Use Eddington if (DF && 0.5*(1.0+erf((r-R_DF)/DR_DF)) > rndU(gen)) { - halo2->gen_velocity(&p.pos[0], &p.vel[0], nok); + nok = halo2->gen_velocity(&p.pos[0], &p.vel[0]); if (nok) { std::cout << "gen_velocity failed: " diff --git a/utils/ICs/DiskHalo.cc b/utils/ICs/DiskHalo.cc index c64a75c6a..0ff393af6 100644 --- a/utils/ICs/DiskHalo.cc +++ b/utils/ICs/DiskHalo.cc @@ -411,7 +411,7 @@ void DiskHalo::set_halo(vector& phalo, int nhalo, int npart) for (int i=0; igen_point(ierr); + std::tie(ps, ierr) = multi->gen_point(); if (ierr) count1++; } while (ierr); @@ -631,7 +631,7 @@ set_halo_coordinates(vector& phalo, int nhalo, int npart) p.pos[2] = r*costh; do { - ps = halo2->gen_point(ierr); + std::tie(ps, ierr) = halo2->gen_point(); } while (ierr); massp1 += p.mass; @@ -2560,7 +2560,8 @@ void DiskHalo::set_vel_halo(vector& part) // Use Eddington if (DF && 0.5*(1.0+erf((r-R_DF)/DR_DF)) > rndU(gen)) { - halo2->gen_velocity(&p.pos[0], &p.vel[0], nok); + + nok = halo2->gen_velocity(&p.pos[0], &p.vel[0]); if (nok) { std::cout << "gen_velocity failed: " diff --git a/utils/ICs/ZangICs.cc b/utils/ICs/ZangICs.cc index 638c6c755..660a7633c 100644 --- a/utils/ICs/ZangICs.cc +++ b/utils/ICs/ZangICs.cc @@ -27,6 +27,7 @@ main(int ac, char **av) //===================== int N; // Number of particles + int Nrepl; // Number of particle replicates per orbit double mu, nu, Ri, Ro; // Taper paramters double Rmin, Rmax; // Radial range double sigma; // Velocity dispersion @@ -59,6 +60,8 @@ main(int ac, char **av) cxxopts::value(sigma)->default_value("1.0")) ("s,seed", "Random number seed. Default: use /dev/random", cxxopts::value(seed)) + ("q,Nrepl", "Number of particle replicates per orbit", + cxxopts::value(Nrepl)->default_value("1")) ("f,file", "Output body file", cxxopts::value(bodyfile)->default_value("zang.bods")) ; @@ -84,6 +87,10 @@ main(int ac, char **av) seed = std::random_device{}(); } + // Set particle number consitent with even replicants + if (Nrepl<1) Nrepl = 1; + if (Nrepl>1) N = (N/Nrepl)*Nrepl; + // Make sure N>0 if (N<=0) { std::cerr << av[0] << ": you must requiest at least one body" @@ -107,6 +114,11 @@ main(int ac, char **av) 1.0, Rmin, Rmax); model->setup_df(sigma); + // Replicant logic + // + int Number = N/Nrepl; + double dPhi = 2.0*M_PI/Nrepl; + // Progress bar // std::shared_ptr progress; @@ -115,7 +127,7 @@ main(int ac, char **av) { nomp = omp_get_num_threads(); if (omp_get_thread_num()==0) { - progress = std::make_shared(N/nomp); + progress = std::make_shared(Number/nomp); } } @@ -190,7 +202,7 @@ main(int ac, char **av) // Generation loop with OpenMP // #pragma omp parallel for reduction(+:over) - for (int n=0; n M_PI) vr *= -1.0; // Branch of radial motion - // Convert from polar to Cartesian - // - pos[n][0] = r*cos(phi); - pos[n][1] = r*sin(phi); - pos[n][2] = 0.0; - - vel[n][0] = vr*cos(phi) - vt*sin(phi); - vel[n][1] = vr*sin(phi) + vt*cos(phi); - vel[n][2] = 0.0; - - // Accumulate mean position and velocity - // - for (int k=0; k<3; k++) { - zeropos[tid][k] += pos[n][k]; - zerovel[tid][k] += vel[n][k]; + for (int nn=0; nn +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#ifdef HAVE_OMP_H +#include +#endif + +// EXP classes +// +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // Library globals +#include // Command-line parsing +#include // Ini-style config + + // Local headers +#include +#include + +int +main(int ac, char **av) +{ + //==================== + // Inialize MPI stuff + //==================== + + local_init_mpi(ac, av); + + //==================== + // Begin opt parsing + //==================== + + std::string halofile; + int DIVERGE, N; + double DIVERGE_RFAC, mass, radius, width, vdisp; + std::string outfile, config; + + const std::string mesg("Make a model from the combination of two spherical models. E.g. a DM halo and a bulge.\n"); + + cxxopts::Options options(av[0], mesg); + + options.add_options() + ("h,help", "Print this help message") + ("s,spline", "Set interplation in SphericalModelTable to 'spline' (default: linear)") + ("T,template", "Write template options file with current and all default values") + ("c,config", "Parameter configuration file", + cxxopts::value(config)) + ("N,number", "Number of particles", + cxxopts::value(N)->default_value("100000")) + ("DIVERGE", "Cusp power-law density extrapolation (0 means off)", + cxxopts::value(DIVERGE)->default_value("0")) + ("DIVERGE_RFAC", "Exponent for inner cusp extrapolation", + cxxopts::value(DIVERGE_RFAC)->default_value("1.0")) + ("i,halofile", "Halo spherical model table", + cxxopts::value(halofile)->default_value("SLGridSph.one")) + ("o,outfile", "Output particle file", + cxxopts::value(outfile)->default_value("addring.bods")) + ("M,mass", "Mass of ring", + cxxopts::value(mass)->default_value("0.1")) + ("R,radius", "Radius of ring", + cxxopts::value(radius)->default_value("1.0")) + ("W,width", "Width of ring", + cxxopts::value(width)->default_value("0.1")) + ("V,disp", "Velocity dispersion", + cxxopts::value(vdisp)->default_value("0.1")) + ; + + cxxopts::ParseResult vm; + + try { + vm = options.parse(ac, av); + } catch (cxxopts::OptionException& e) { + if (myid==0) std::cout << "Option error: " << e.what() << std::endl; + MPI_Finalize(); + exit(-1); + } + + // Write YAML template config file and exit + // + if (vm.count("template")) { + // Write template file + // + if (myid==0) SaveConfig(vm, options, "template.yaml"); + + MPI_Finalize(); + return 0; + } + + // Print help message and exit + // + if (vm.count("help")) { + if (myid == 0) { + std::cout << options.help() << std::endl << std::endl + << "Examples: " << std::endl + << "\t" << "Use parameters read from a YAML config file" << std::endl + << "\t" << av[0] << " --config=addsphmod.config" << std::endl << std::endl + << "\t" << "Generate a template YAML config file from current defaults" << std::endl + << "\t" << av[0] << " --template=template.yaml" << std::endl << std::endl; + } + MPI_Finalize(); + return 0; + } + + // Read parameters fron the YAML config file + // + if (vm.count("config")) { + try { + vm = LoadConfig(options, config); + } catch (cxxopts::OptionException& e) { + if (myid==0) std::cout << "Option error in configuration file: " + << e.what() << std::endl; + MPI_Finalize(); + return 0; + } + } + + if (vm.count("spline")) { + SphericalModelTable::linear = 0; + } else { + SphericalModelTable::linear = 1; + } + + // Open output file + // + std::ofstream out(outfile); + if (not out) throw std::runtime_error("Cannot open output file " + outfile); + + // Model + // + SphericalModelTable model(halofile, DIVERGE, DIVERGE_RFAC); + + + // Random setup + // + std::random_device rd{}; + std::mt19937 gen{rd()}; + + // Unit normal distribution + // + std::normal_distribution<> d{0.0, 1.0}; + std::uniform_real_distribution<> u{0.0, 1.0}; + + + double pmass = mass/N; + Eigen::Vector3d pos, vel; + + for (int i=0; i #include #include #include -#include -#include -#include -#include -#include -#include - -#include #include #ifdef HAVE_OMP_H @@ -23,398 +13,59 @@ // EXP classes // #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include // Command-line parsing #include // Ini-style config -#include - -#define M_SQRT1_3 (0.5773502691896257645091487) - - // For debugging -#ifdef DEBUG -#include -#include - -//=========================================== -// Handlers defined in exputil/stack.cc -//=========================================== - -extern void mpi_print_trace(const string& routine, const string& msg, - const char *file, int line); - -extern void mpi_gdb_print_trace(int sig); - -extern void mpi_gdb_wait_trace(int sig); - -//=========================================== -// A signal handler to trap invalid FP only -//=========================================== - -void set_fpu_invalid_handler(void) -{ - // Flag invalid FP results only, such as 0/0 or infinity - infinity - // or sqrt(-1). - // - feenableexcept(FE_INVALID); - // - // Print enabled flags to root node - // - if (myid==0) { - const std::list> flags = - { {FE_DIVBYZERO, "divide-by-zero"}, - {FE_INEXACT, "inexact"}, - {FE_INVALID, "invalid"}, - {FE_OVERFLOW, "overflow"}, - {FE_UNDERFLOW, "underflow"} }; - - int _flags = fegetexcept(); - std::cout << "Enabled FE flags: <"; - for (auto v : flags) { - if (v.first & _flags) std::cout << v.second << ' '; - } - std::cout << "\b>" << std::endl; - } - signal(SIGFPE, mpi_gdb_print_trace); -} - -//=========================================== -// A signal handler to produce a traceback -//=========================================== - -void set_fpu_trace_handler(void) -{ - // Flag all FP errors except inexact - // - // fedisableexcept(FE_ALL_EXCEPT & ~FE_INEXACT); - - // Flag invalid FP results only, such as 0/0 or infinity - infinity - // or sqrt(-1). - // - feenableexcept(FE_INVALID); - // - // Print enabled flags to root node - // - if (myid==0) { - const std::list> flags = - { {FE_DIVBYZERO, "divide-by-zero"}, - {FE_INEXACT, "inexact"}, - {FE_INVALID, "invalid"}, - {FE_OVERFLOW, "overflow"}, - {FE_UNDERFLOW, "underflow"} }; - - int _flags = fegetexcept(); - std::cout << "Enabled FE flags: <"; - for (auto v : flags) { - if (v.first & _flags) std::cout << v.second << ' '; - } - std::cout << "\b>" << std::endl; - } - signal(SIGFPE, mpi_gdb_print_trace); -} - -//=========================================== -// A signal handler to produce stop and wait -//=========================================== - -void set_fpu_gdb_handler(void) -{ - // Flag all FP errors except inexact - // - // fedisableexcept(FE_ALL_EXCEPT & ~FE_INEXACT); - - // Flag invalid FP results only, such as 0/0 or infinity - infinity - // or sqrt(-1). - // - feenableexcept(FE_INVALID); - // - // Print enabled flags to root node - // - if (myid==0) { - const std::list> flags = - { {FE_DIVBYZERO, "divide-by-zero"}, - {FE_INEXACT, "inexact"}, - {FE_INVALID, "invalid"}, - {FE_OVERFLOW, "overflow"}, - {FE_UNDERFLOW, "underflow"} }; - - int _flags = fegetexcept(); - std::cout << "Enabled FE flags: <"; - for (auto v : flags) { - if (v.first & _flags) std::cout << v.second << ' '; - } - std::cout << "\b>" << std::endl; - } - signal(SIGFPE, mpi_gdb_wait_trace); -} - -#endif - - // Local headers -#include - - -// Global variables -// -enum DiskType { constant, gaussian, mn, exponential, doubleexpon, diskbulge, python }; - -std::map dtlookup = - { {"constant", DiskType::constant}, - {"gaussian", DiskType::gaussian}, - {"mn", DiskType::mn}, - {"exponential", DiskType::exponential}, - {"doubleexpon", DiskType::doubleexpon}, - {"diskbulge", DiskType::diskbulge}, - {"python", DiskType::python} - }; - -DiskType DTYPE; -std::string pyname; -double ASCALE; -double ASHIFT; -double HSCALE; -double HERNA; -double Mfac; -double RTRUNC = 1.0; -double RWIDTH = 0.0; -double ARATIO = 1.0; -double HRATIO = 1.0; -double DWEIGHT = 1.0; - -#include - -static std::shared_ptr pydens; - -double DiskDens(double R, double z, double phi) -{ - double ans = 0.0; - - switch (DTYPE) { - - case DiskType::constant: - if (R < ASCALE && fabs(z) < HSCALE) - ans = 1.0/(2.0*HSCALE*M_PI*ASCALE*ASCALE); - break; - - case DiskType::gaussian: - if (fabs(z) < HSCALE) - ans = 1.0/(2.0*HSCALE*2.0*M_PI*ASCALE*ASCALE)* - exp(-R*R/(2.0*ASCALE*ASCALE)); - break; - - case DiskType::mn: - { - double Z2 = z*z + HSCALE*HSCALE; - double Z = sqrt(Z2); - double Q2 = (ASCALE + Z)*(ASCALE + Z); - ans = 0.25*HSCALE*HSCALE/M_PI*(ASCALE*R*R + (ASCALE + 3.0*Z)*Q2)/( pow(R*R + Q2, 2.5) * Z*Z2 ); - } - break; - - case DiskType::doubleexpon: - { - double a1 = ASCALE; - double a2 = ASCALE*ARATIO; - double h1 = HSCALE; - double h2 = HSCALE*HRATIO; - double w1 = 1.0/(1.0+DWEIGHT); - double w2 = DWEIGHT/(1.0+DWEIGHT); - - double f1 = cosh(z/h1); - double f2 = cosh(z/h2); - - ans = - w1*exp(-R/a1)/(4.0*M_PI*a1*a1*h1*f1*f1) + - w2*exp(-R/a2)/(4.0*M_PI*a2*a2*h2*f2*f2) ; - } - break; - case DiskType::diskbulge: - { - double f = cosh(z/HSCALE); - double rr = pow(pow(R, 2) + pow(z,2), 0.5); - double w1 = Mfac; - double w2 = (1-Mfac); - double as = HERNA; - - ans = w1*exp(-R/ASCALE)/(4.0*M_PI*ASCALE*ASCALE*HSCALE*f*f) + - w2*pow(as, 4)/(2.0*M_PI*rr)*pow(rr+as,-3.0) ; - } - break; - case DiskType::python: - { - if (not pydens) pydens = std::make_shared(pyname); - ans = (*pydens)(R, z, phi); - } - break; - case DiskType::exponential: - default: - { - double f = cosh(z/HSCALE); - ans = exp(-R/ASCALE)/(4.0*M_PI*ASCALE*ASCALE*HSCALE*f*f); - } - break; - } - - if (RWIDTH>0.0) ans *= erf((RTRUNC-R)/RWIDTH); - - return ans; -} - -double dcond(double R, double z, double phi, int M) -{ - // - // No shift for M==0 - // - if (M==0) return DiskDens(R, z, phi); - - // - // Fold into [-PI/M, PI/M] for M>=1 - // - double dmult = M_PI/M, phiS; - if (phi>M_PI) - phiS = phi + dmult*(int)((2.0*M_PI - phi)/dmult); - else - phiS = phi - dmult*(int)(phi/dmult); - - // - // Apply a shift along the x-axis - // - double x = R*cos(phiS) - ASHIFT*ASCALE; - double y = R*sin(phiS); - - return DiskDens(sqrt(x*x + y*y), z, atan2(y, x)); -} - int main(int ac, char **av) { - //==================== - // Inialize MPI stuff - //==================== + // Parameters + // + int nthrds = 1; + std::string config; + + // Default/example YAML file for disk + // + std::string disk_config = + "id : cylinder\n" + "parameters :\n" + " acyl : 1.0\n" + " hcyl : 0.1\n" + " lmaxfid : 48\n" + " nmaxfid : 48\n" + " mmax : 10\n" + " nmax : 32\n" + " ncylodd : 6\n" + " ncylnx : 256\n" + " ncylny : 128\n" + " ncylr : 2000\n" + " rnum : 200\n" + " pnum : 1\n" + " tnum : 80\n" + " rcylmin : 0.001\n" + " rcylmax : 20\n" + " ashift : 0\n" + " logr : true\n" + " expcond : true\n" + " deproject : true\n" + " cachename : eof.cache.file_new\n" + " vflag : 5\n" + ; + // Inialize MPI stuff + // local_init_mpi(ac, av); - //==================== - // Begin opt parsing - //==================== - - double RCYLMIN; - double RCYLMAX; - double RFACTOR; - int RNUM; - int PNUM; - int TNUM; - int VFLAG; - bool expcond; - bool LOGR; - int NUMR; - int CMAPR; - int CMAPZ; - int CMTYPE; - int NMAXFID; - int LMAXFID; - int MMAX; - int NUMX; - int NUMY; - int NOUT; - int NMAX; - int NCYLODD; - double PPower; - std::string cachefile; - std::string config; - std::string dtype; - std::string dmodel; - std::string mtype; - std::string ctype; - - const std::string mesg("Generates an EmpCylSL cache\n"); + const std::string mesg("Generates an EmpCylSL cache from a YAML configuration file\n"); cxxopts::Options options(av[0], mesg); options.add_options() ("h,help", "Print this help message") - ("T,template", "Write template options file with current and all default values") - ("c,config", "Parameter configuration file", - cxxopts::value(config)) - ("deproject", "The EmpCylSL deprojection from specified disk model (EXP or MN)", - cxxopts::value(dmodel)->default_value("EXP")) - ("cachefile", "The cache file for the cylindrical basis", - cxxopts::value(cachefile)->default_value(".eof.cache.file")) - ("ctype", "DiskHalo radial coordinate scaling type (one of: Linear, Log, Rat)", - cxxopts::value(ctype)->default_value("Log")) - ("LMAXFID", "Maximum angular order for spherical basis in adaptive construction of the cylindrical basis", - cxxopts::value(LMAXFID)->default_value("32")) - ("NMAXFID", "Maximum radial order for the spherical basis in adapative construction of the cylindrical basis", - cxxopts::value(NMAXFID)->default_value("32")) - ("MMAX", "Maximum azimuthal order for the cylindrical basis", - cxxopts::value(MMAX)->default_value("6")) - ("NUMX", "Size of the (mapped) cylindrical radial grid", - cxxopts::value(NUMX)->default_value("256")) - ("NUMY", "Size of the (mapped) cylindrical vertical grid", - cxxopts::value(NUMY)->default_value("128")) - ("dtype", "Spherical model type for adpative basis creation", - cxxopts::value(dtype)->default_value("exponential")) - ("NCYLODD", "Number of vertically odd basis functions per harmonic order", - cxxopts::value(NCYLODD)->default_value("6")) - ("NMAX", "Total number of basis functions per harmonic order", - cxxopts::value(NMAX)->default_value("20")) - ("VFLAG", "Diagnostic flag for EmpCylSL", - cxxopts::value(VFLAG)->default_value("31")) - ("expcond", "Use analytic target density rather than particle distribution", - cxxopts::value(expcond)->default_value("true")) - ("LOGR", "Logarithmic scaling for model table in EmpCylSL", - cxxopts::value(LOGR)->default_value("true")) - ("RCYLMIN", "Minimum disk radius for EmpCylSL", - cxxopts::value(RCYLMIN)->default_value("0.001")) - ("RCYLMAX", "Maximum disk radius for EmpCylSL", - cxxopts::value(RCYLMAX)->default_value("20.0")) - ("ASCALE", "Radial scale length for disk basis construction", - cxxopts::value(ASCALE)->default_value("1.0")) - ("ASHIFT", "Fraction of scale length for shift in conditioning function", - cxxopts::value(ASHIFT)->default_value("0.0")) - ("HSCALE", "Vertical scale length for disk basis construction", - cxxopts::value(HSCALE)->default_value("0.1")) - ("ARATIO", "Radial scale length ratio for disk basis construction with doubleexpon", - cxxopts::value(ARATIO)->default_value("1.0")) - ("HRATIO", "Vertical scale height ratio for disk basis construction with doubleexpon", - cxxopts::value(HRATIO)->default_value("1.0")) - ("NUMR", "Size of radial grid", - cxxopts::value(NUMR)->default_value("2000")) - ("PPOW", "Power-law index for power-law disk profile", - cxxopts::value(PPower)->default_value("4.0")) - ("DWEIGHT", "Ratio of second disk relative to the first disk for disk basis construction with double-exponential", - cxxopts::value(DWEIGHT)->default_value("1.0")) - ("Mfac", "Mass fraction of the disk for diskbulge model, sets disk/bulge mass (e.g. 0.75 -> 75% of mass in disk, 25% of mass in bulge)", - cxxopts::value(Mfac)->default_value("1.0")) - ("HERNA", "Hernquist scale a parameter in diskbulge model", - cxxopts::value(HERNA)->default_value("0.10")) - ("RTRUNC", "Maximum disk radius for erf truncation of EOF conditioning density", - cxxopts::value(RTRUNC)->default_value("0.1")) - ("RWIDTH", "Width for erf truncation for EOF conditioning density (ignored if zero)", - cxxopts::value(RWIDTH)->default_value("0.0")) - ("RFACTOR", "Disk radial scaling factor for spherical deprojection model", - cxxopts::value(RFACTOR)->default_value("1.0")) - ("RNUM", "Number of radial knots for EmpCylSL basis construction quadrature", - cxxopts::value(RNUM)->default_value("200")) - ("PNUM", "Number of azimthal knots for EmpCylSL basis construction quadrature", - cxxopts::value(PNUM)->default_value("1")) - ("TNUM", "Number of cos(theta) knots for EmpCylSL basis construction quadrature", - cxxopts::value(TNUM)->default_value("80")) - ("CMAPR", "Radial coordinate mapping type for cylindrical grid (0=none, 1=rational fct)", - cxxopts::value(CMAPR)->default_value("1")) - ("CMAPZ", "Vertical coordinate mapping type for cylindrical grid (0=none, 1=rational fct)", - cxxopts::value(CMAPZ)->default_value("1")) - ("pyname", "The name of the Python module supplying the disk_density function", - cxxopts::value(pyname)) + ("T,template", "Write template YAML config file") + ("c,config", "Read a configuration file", cxxopts::value(config)) + ("t,threads", "Number of OpenMP threads", cxxopts::value(nthrds)) ; cxxopts::ParseResult vm; @@ -430,29 +81,25 @@ main(int ac, char **av) // Write YAML template config file and exit // if (vm.count("template")) { - NOUT = NMAX;//std::min(NOUT, NMAX); - - // Write template file - // - if (myid==0) SaveConfig(vm, options, "template.yaml"); - + if (myid==0) { + std::ofstream out("template.yaml"); + if (out) { + out << disk_config; + std::cout << "Wrote . Please adjust to your preferences" + << std::endl; + } else { + std::cerr << "Error opening for output" << std::endl; + } + } MPI_Finalize(); return 0; } - // Print help message and exit // if (vm.count("help")) { if (myid == 0) { - std::cout << options.help() << std::endl << std::endl - << "Examples: " << std::endl - << "\t" << "Use parameters read from a config file in INI style" << std::endl - << "\t" << av[0] << " --config=gendisk.config" << std::endl << std::endl - << "\t" << "Generate a template YAML config file current defaults called " << std::endl - << "\t" << av[0] << " --template" << std::endl << std::endl - << "\t" << "Override a single parameter in a config file from the command line" << std::endl - << "\t" << av[0] << "--LMAX=8 --config=my.config" << std::endl << std::endl; + std::cout << options.help() << std::endl << std::endl; } MPI_Finalize(); return 0; @@ -462,78 +109,25 @@ main(int ac, char **av) // if (vm.count("config")) { try { - vm = LoadConfig(options, config); - } catch (cxxopts::OptionException& e) { - if (myid==0) std::cout << "Option error in configuration file: " + auto name = vm["config"].as(); + auto size = std::filesystem::file_size(name); + std::string content(size, '\0'); + std::ifstream in(name); + in.read(&content[0], size); + disk_config = content; + } catch (std::runtime_error& e) { + if (myid==0) std::cout << "Error reading configuration file: " << e.what() << std::endl; MPI_Finalize(); return 0; } } - // Convert mtype string to lower case - // - std::transform(mtype.begin(), mtype.end(), mtype.begin(), - [](unsigned char c){ return std::tolower(c); }); - - // Set EmpCylSL mtype. This is the spherical function used to - // generate the EOF basis. If "deproject" is set, this will be - // overriden in EmpCylSL. - // - EmpCylSL::mtype = EmpCylSL::Exponential; - if (vm.count("mtype")) { - if (mtype.compare("exponential")==0) - EmpCylSL::mtype = EmpCylSL::Exponential; - else if (mtype.compare("gaussian")==0) - EmpCylSL::mtype = EmpCylSL::Gaussian; - else if (mtype.compare("plummer")==0) - EmpCylSL::mtype = EmpCylSL::Plummer; - else if (mtype.compare("power")==0) { - EmpCylSL::mtype = EmpCylSL::Power; - EmpCylSL::PPOW = PPower; - } else { - if (myid==0) std::cout << "No EmpCylSL EmpModel named <" - << mtype << ">, valid types are: " - << "Exponential, Gaussian, Plummer, Power " - << "(not case sensitive)" << std::endl; - MPI_Finalize(); - return 0; - } - } - - // Convert dtype string to lower case + // Explicit OpenMP control; otherwise, fall back to the environment + // setting OMP_NUM_THREADS. // - std::transform(dtype.begin(), dtype.end(), dtype.begin(), - [](unsigned char c){ return std::tolower(c); }); - - - // Set DiskType. This is the functional form for the disk used to - // condition the basis. - // - try { // Check for map entry, will through if the - DTYPE = dtlookup.at(dtype); // key is not in the map. - - if (myid==0) // Report DiskType - std::cout << "---- DiskType is <" << dtype << ">" << std::endl; - } - catch (const std::out_of_range& err) { - if (myid==0) { - std::cout << "DiskType error in configuraton file" << std::endl; - std::cout << "Valid options are: "; - for (auto v : dtlookup) std::cout << v.first << " "; - std::cout << std::endl; - } - MPI_Finalize(); - return 0; - } - - - //==================== - // OpenMP control - //==================== - #ifdef HAVE_OMP_H - omp_set_num_threads(nthrds); + if (vm.count("threads")) omp_set_num_threads(nthrds); #pragma omp parallel if (myid==0) { int numthrd = omp_get_num_threads(); @@ -541,102 +135,26 @@ main(int ac, char **av) } #endif - - //==================== - // Okay, now begin ... - //==================== - -#ifdef DEBUG // For gdb . . . - sleep(20); - // set_fpu_handler(); // Make gdb trap FPU exceptions - set_fpu_gdb_handler(); // Make gdb trap FPU exceptions -#endif - - - - //===========================Cylindrical expansion=========================== - - - EmpCylSL::RMIN = RCYLMIN; - EmpCylSL::RMAX = RCYLMAX; - EmpCylSL::NUMX = NUMX; - EmpCylSL::NUMY = NUMY; - EmpCylSL::NUMR = NUMR; - EmpCylSL::NOUT = NMAX; - EmpCylSL::CMAPR = CMAPR; - EmpCylSL::CMAPZ = CMAPZ; - EmpCylSL::VFLAG = VFLAG; - EmpCylSL::logarithmic = LOGR; - - // Create expansion only if needed . . . - std::shared_ptr expandd; - - // - expandd = std::make_shared(NMAXFID, LMAXFID, MMAX, NMAX, ASCALE, HSCALE, NCYLODD, cachefile); - -#ifdef DEBUG - std::cout << "Process " << myid << ": " - << " rmin=" << EmpCylSL::RMIN - << " rmax=" << EmpCylSL::RMAX - << " a=" << ASCALE - << " h=" << HSCALE - << " as=" << HERNA - << " Mfac=" << Mfac - << " nmax2=" << NMAXFID - << " lmax2=" << LMAXFID - << " mmax=" << MMAX - << " nordz=" << NMAX - << " noddz=" << NCYLODD - << std::endl << std::flush; -#endif - - - // Use these user models to deproject for the EOF spherical basis + // Begin calculation and start stopwatch // - if (vm.count("deproject")) { - // The scale in EmpCylSL is assumed to be 1 so we compute the - // height relative to the length - // - double H = HSCALE/ASCALE; - - // The model instance (you can add others in DiskModels.H). - // It's MN or Exponential if not MN. - // - EmpCylSL::AxiDiskPtr model; - - if (dmodel.compare("MN")==0) // Miyamoto-Nagai - model = std::make_shared(1.0, H); - else // Default to exponential - model = std::make_shared(1.0, H); - - if (RWIDTH>0.0) { - model = std::make_shared(RTRUNC/ASCALE, - RWIDTH/ASCALE, - model); - if (myid==0) - std::cout << "Made truncated model with R=" << RTRUNC/ASCALE - << " and W=" << RWIDTH/ASCALE << std::endl; - } + auto t_start = MPI_Wtime(); - expandd->create_deprojection(H, RFACTOR, NUMR, RNUM, model); - } - - // Regenerate EOF from analytic density + // Construct the basis instance // - expandd->generate_eof(RNUM, PNUM, TNUM, dcond); + auto disk_basis = BasisClasses::Basis::factory_string(disk_config); + + MPI_Barrier(MPI_COMM_WORLD); - // Basis orthgonality check + // DONE // - if (vm.count("ortho")) { - std::ofstream out(runtag + ".ortho_check"); - expandd->ortho_check(out); - } + if (myid==0) + std::cout << std::string(60, '=') << std::endl + << "Calculation finished in " << std::setprecision(3) + << MPI_Wtime() - t_start << " seconds" << std::endl + << std::string(60, '=') << std::endl; - //=========================================================================== // Shutdown MPI - //=========================================================================== - - MPI_Barrier(MPI_COMM_WORLD); + // MPI_Finalize(); return 0; diff --git a/utils/ICs/gensph.cc b/utils/ICs/gensph.cc index b42b397bc..045d48f28 100644 --- a/utils/ICs/gensph.cc +++ b/utils/ICs/gensph.cc @@ -61,13 +61,16 @@ using namespace __EXP__; #include #endif +#include + // Global variables int main(int argc, char **argv) { int HMODEL, N, NUMDF, NUMR, NUMJ, NUME, NUMG, NREPORT, SEED, ITMAX; - int NUMMODEL, RNUM, DIVERGE, DIVERGE2, LINEAR, NUMINT, NI, ND; + int NUMMODEL, RNUM, DIVERGE, DIVERGE2, LINEAR, NUMINT, NI, ND; + int Nrepl=1, Nfib=1; double DIVERGE_RFAC, DIVERGE_RFAC2, NN, MM, RA, RMODMIN, RMOD, EPS; double X0, Y0, Z0, U0, V0, W0, TOLE; double Emin0, Emax0, Kmin0, Kmax0, RBAR, MBAR, BRATIO, CRATIO, SMOOTH; @@ -75,6 +78,8 @@ main(int argc, char **argv) bool VTEST; std::string INFILE, MMFILE, OUTFILE, OUTPS, config; + const double goldenRatio = 1.618033988749895; + #ifdef DEBUG set_fpu_handler(); #endif @@ -179,6 +184,10 @@ main(int argc, char **argv) cxxopts::value(ELIMIT)->default_value("false")) ("VTEST", "Test gen_velocity() generation", cxxopts::value(VTEST)->default_value("false")) + ("Nrepl", "Number of replicates in orbital plane (1 skips the Sellwood 1997 algorithm)", + cxxopts::value(Nrepl)->default_value("1")) + ("Nfib", "Number of points on the sphere for each orbit. Replicate the orbits by tiling the angular momentum direction on the sphere using a Fibonnaci sequence. Default value of 1 implies one plane per orbit.", + cxxopts::value(Nfib)->default_value("1")) ("Emin0", "Minimum energy (if ELIMIT=true)", cxxopts::value(Emin0)->default_value("-3.0")) ("Emax0", "Maximum energy (if ELIMIT=true)", @@ -254,6 +263,11 @@ main(int argc, char **argv) if (vm.count("verbose")) VERBOSE = true; else VERBOSE = false; + // Use Fibonnaci for space-filling grid points + // + Nfib = std::max(1, Nfib ); + Nrepl = std::max(1, Nrepl); + // Prepare output streams and create new files // std::ostringstream sout; @@ -520,6 +534,22 @@ main(int argc, char **argv) } + int nfib=0, nangle=0, nshell=0; + + // For replication algorithm; Nrepl=1 is the standard algorithm. + // + Nrepl *= Nfib; + int nplane = N/Nrepl; + N = Nrepl * nplane; + NREPORT = std::max(1, NREPORT/Nrepl); + + if (Nrepl > 1 and myid==0) { + std::cout << std::setw(60) << std::setfill('-') << '-' + << std::setfill(' ') << std::endl + << "Replication: N=" << N << " Nrepl=" << Nrepl/Nfib + << " Nfib=" << Nfib << " Norbits=" << nplane << std::endl; + } + double mass = hmodel->get_mass(hmodel->get_max_radius())/N; AxiSymModPtr rmodel = hmodel; @@ -691,73 +721,195 @@ main(int argc, char **argv) double TT=0.0, WW=0.0, VC=0.0; if (myid==0) - std::cout << "-----------" << std::endl + std::cout << std::setw(60) << std::setfill('-') << '-' << std::endl + << std::setfill('-') << "Body count:" << std::endl; - int npernode = N/numprocs; + int npernode = nplane/numprocs; int beg = myid*npernode; int end = beg + npernode; - if (myid==numprocs-1) end = N; + if (myid==numprocs-1) end = nplane; std::vector PS; Eigen::VectorXd zz = Eigen::VectorXd::Zero(7); + std::vector rmass; + for (int n=beg; ngen_point(Emin0, Emax0, Kmin0, Kmax0, ierr); + std::tie(ps, ierr) = rmodel->gen_point(Emin0, Emax0, Kmin0, Kmax0); else if (VTEST) { - ps = rmodel->gen_point(ierr); - rmodel->gen_velocity(&ps[1], &ps[4], ierr); - if (ierr) { - std::cout << "gen_velocity failed: " - << ps[0] << " " - << ps[1] << " " - << ps[2] << "\n"; + std::tie(ps, ierr) = rmodel->gen_point(); + if (ierr==0) { + ierr = rmodel->gen_velocity(&ps[1], &ps[4]); + if (ierr) { + std::cout << "gen_velocity failed: " + << ps[0] << " " + << ps[1] << " " + << ps[2] << "\n"; + } } } else - ps = rmodel->gen_point(ierr); + std::tie(ps, ierr) = rmodel->gen_point(); if (ierr) count++; } while (ierr); if (ps[0] <= 0.0) negms++; - double RR=0.0; - for (int i=1; i<=3; i++) { - RR += ps[i]*ps[i]; - TT += 0.5*mass*ps[0]*ps[3+i]*ps[3+i]; - } - RR = sqrt(RR); - if (RR>=rmin) { - VC += -mass*ps[0]*rmodel->get_mass(RR)/RR; - WW += 0.5*mass*ps[0]*rmodel->get_pot(RR); - } + Eigen::Vector3d pos(ps[1], ps[2], ps[3]), pos1; + Eigen::Vector3d vel(ps[4], ps[5], ps[6]), vel1; + Eigen::Matrix3d proj, Iprj, rot = Eigen::Matrix3d::Identity(); + Eigen::Matrix3d projM, IprjM; + + double dq = 2.0*M_PI * Nfib / Nrepl, mfac = ps[0]; - if (zeropos or zerovel) { - ps[0] *= mass; - PS.push_back(ps); - zz[0] += ps[0]; - if (zeropos) for (int i=1; i<3; i++) zz[i] -= ps[0]*ps[i]; - if (zerovel) for (int i=4; i<7; i++) zz[i] -= ps[0]*ps[i]; + // Compute orbital plane basis + // + if (Nrepl>1) { + auto L = pos.cross(vel); // The angular momentum vector + if (pos.norm() < 1.0e-10 or L.norm() < 1.0e-10) { + proj = Iprj = Eigen::Matrix3d::Identity(); + } else { + auto X = pos/pos.norm(); // Radial unit vector, X. + auto Y = L.cross(X); // Choose a right-hand3d coordinate + Y = Y/Y.norm(); // system perpendicular to X and L. + auto Z = L/L.norm(); // Unit vector in the ang. mom. direction. + + // Azimuth of vertical plane containing L + double Phi = atan2(Z(1), Z(0)) + 0.5*M_PI; + + // Tilt of plane w.r.t to polar axis + double Theta = acos(Z(2)); + + // Location of X in orbital plane w.r.t line of nodes + Eigen::Vector3d lon(cos(Phi), sin(Phi), 0.0); + double dotp = lon.dot(X); + double Psi = acos(dotp); + auto dir = lon.cross(X); + + proj.row(0) = X; // Project to orbital plane frame + proj.row(1) = Y; + proj.row(2) = Z; + + Iprj.col(0) = X; // Project from orbital plane to original + Iprj.col(1) = Y; + Iprj.col(2) = Z; + + if (Nfib>1) { + // Quadrant geometry + int sgn1 = 1, sgn2 = 1; + if (Theta > 0.5*M_PI) sgn1 = -1; + if (dir(2) < 0.0) sgn2 = -1; + + // Sanity check for debugging + if (true) { + double norm = 0.0; + if (sgn1*sgn2==1) + norm = (proj - return_euler(Phi, Theta, Psi, 0)).norm(); + else + norm = (proj - return_euler(Phi, Theta, -Psi, 0)).norm(); + + if (fabs(norm) > 1.0e-10) { + std::cout << "Theta=" << Theta + << " dir(2)=" << dir(2) << std::endl; + } + } + + if (sgn1*sgn2==1) { + projM = return_euler(Phi, -Theta, Psi, 0); + IprjM = return_euler(Phi, -Theta, Psi, 1); + } else { + projM = return_euler(Phi, -Theta, -Psi, 0); + IprjM = return_euler(Phi, -Theta, -Psi, 1); + } + } + } } - else { - out << std::setw(20) << mass * ps[0]; - for (int i=1; i<=6; i++) out << std::setw(20) << ps[i]+ps0[i]; - if (NI) { - for (int n=0; n=rmin) { + VC += -mass*ps[0]*rmodel->get_mass(RR)/RR; + WW += 0.5*mass*ps[0]*rmodel->get_pot(RR); } - out << std::endl; + if (zeropos or zerovel) { + ps[0] *= mass; + PS.push_back(ps); + zz[0] += ps[0]; + if (zeropos) for (int i=1; i<3; i++) zz[i] -= ps[0]*ps[i]; + if (zerovel) for (int i=4; i<7; i++) zz[i] -= ps[0]*ps[i]; + } + else { + out << std::setw(20) << mass * ps[0]; + for (int i=1; i<=6; i++) out << std::setw(20) << ps[i]+ps0[i]; + + if (NI) { + for (int n=0; n1) { + + Eigen::Matrix3d invt; + double Q = dq*(q/Nfib+1), phi=0.0, cost=0.0; + int nfib = q % Nfib; + + if (Nfib>1) { + phi = 2.0*M_PI * nfib / goldenRatio; + cost = 1.0 - 2.0 * nfib/Nfib; + cost = (cost < -1.0 ? -1.0 : (cost > 1.0 ? 1.0 : cost)); + invt = return_euler(phi, acos(cost), 0, 1); + } + + // Update rotation matrix + rot(0, 0) = rot(1, 1) = cos(Q); + rot(0, 1) = -sin(Q); + rot(1, 0) = sin(Q); + + // Full rotation matrix + Eigen::Matrix3d trans; + if (Nfib>1) trans = invt * rot * proj; + else trans = Iprj * rot * proj; + + // Rotate position and velocity + pos1 = trans * pos; + vel1 = -trans * vel; + + // Reset the phase-space vector + // + ps[0] = mfac; + + ps[1] = pos1(0); + ps[2] = pos1(1); + ps[3] = pos1(2); + + ps[4] = vel1(0); + ps[5] = vel1(1); + ps[6] = vel1(2); + } } - if (myid==0 and !((n+1)%NREPORT)) cout << '\r' << (n+1)*numprocs << flush; + if (myid==0 and !((n+1)%NREPORT)) cout << '\r' << (n+1)*numprocs*Nrepl + << flush; } if (zeropos or zerovel) { @@ -797,16 +949,20 @@ main(int argc, char **argv) MPI_Reduce(MPI_IN_PLACE, &WW, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(MPI_IN_PLACE, &VC, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - cout << std::endl << "-----------" << std::endl << std::endl; - cout << std::setw(20) << "States rejected: " << count << std::endl; - cout << std::setw(20) << "Negative masses: " << negms << std::endl; - cout << std::setw(60) << setfill('-') << '-' << std::endl << setfill(' '); - cout << std::setw(20) << "KE=" << TT << std::endl; - cout << std::setw(20) << "PE=" << WW << std::endl; - cout << std::setw(20) << "VC=" << VC << std::endl; - cout << std::setw(20) << "Ratio (-2T/W)=" << -2.0*TT/WW << std::endl; - cout << std::setw(20) << "Ratio (-2T/C)=" << -2.0*TT/VC << std::endl; - std::cout << std::setw(60) << std::setfill('-') << '-' << std::endl << std::setfill(' '); + std::cout << std::endl + << std::setw(60) << std::setfill('-') << '-' << std::endl + << std::setfill(' ') + << std::setw(20) << "States rejected: " << count << std::endl + << std::setw(20) << "Negative masses: " << negms << std::endl + << std::setw(60) << setfill('-') << '-' << std::endl + << setfill(' ') + << std::setw(20) << "KE=" << TT << std::endl + << std::setw(20) << "PE=" << WW << std::endl + << std::setw(20) << "VC=" << VC << std::endl + << std::setw(20) << "Ratio (-2T/W)=" << -2.0*TT/WW << std::endl + << std::setw(20) << "Ratio (-2T/C)=" << -2.0*TT/VC << std::endl + << std::setw(60) << std::setfill('-') << '-' << std::endl + << std::setfill(' '); } out.close(); diff --git a/utils/PhaseSpace/CMakeLists.txt b/utils/PhaseSpace/CMakeLists.txt index 63ced2c1f..809835df7 100644 --- a/utils/PhaseSpace/CMakeLists.txt +++ b/utils/PhaseSpace/CMakeLists.txt @@ -66,7 +66,7 @@ add_executable(psp2rings psp2rings.cc PSP.cc) add_executable(psp2bess psp2bess.cc PSP.cc Bess.cc) add_executable(psp2lagu psp2lagu.cc PSP.cc) add_executable(psp2interp psp2interp.cc PSP.cc) -add_executable(diffpsp diffpsp.cc MakeModel.cc PSP.cc) +add_executable(diffpsp diffpsp.cc MakeModel.cc PSP.cc KDE2d.cc) add_executable(pspmono pspmono.cc MakeModel.cc PSP.cc) add_executable(psp2hdf5 psp2hdf5.cc PSP.cc) add_executable(psporbv psporbv.cc PSP.cc) diff --git a/utils/PhaseSpace/KDE2d.H b/utils/PhaseSpace/KDE2d.H new file mode 100644 index 000000000..d8fa41f27 --- /dev/null +++ b/utils/PhaseSpace/KDE2d.H @@ -0,0 +1,119 @@ +#ifndef KDE2D_H +#define KDE2D_H + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +namespace KDE +{ + /** + Class for 2D kernel density estimation + + Parameters + ---------- + @param numx: desired grid size in x + @param numy: desired grid size in y + @param xmin: minimum value in x + @param xmax: maximum value in x + @param ymin: minimum value in y + @param ymax: maximum value in y + @param sigmax: smoothing kernel width in x units + @param sigmay: smoothing kernel width in y units + + Data input + ---------- + Can be either a vector of pairs (x,y), an array of x and y + values, or a precomputed histogram-like grid of size (numx, + numy). + + Output + ------ + Smoothed 2D grid in Eigen::MatrixXd format + */ + class KDE2d + { + protected: + + //@{ + //! Parameters + bool debug = false; // Set to true for intermediate output + const int minKsize = 7; // Default minimum kernel size + int numx, numy; + double xmin, xmax, ymin, ymax, sigmax, sigmay, delx, dely; + //@} + + //! Create a smooth 2D gaussian kernel + std::vector + gaussian_kernel_2d(int xsize, int ysize); + + //! Do the FFT convolution + void kde_fft_2d(); + + //@{ + //! Make 2D grid + void grid_pairs(const std::vector>& data); + void grid_array(const std::vector& x, const std::vector& y); + //@} + + //! Eigen matrices + Eigen::MatrixXd grid, smooth; + + public: + + //! Constructor + KDE2d(int numx, int numy, + double xmin, double xmax, + double ymin, double ymax, + double sigmax, double sigmay) : + numx(numx), numy(numy), xmin(xmin), xmax(xmax), + ymin(ymin), ymax(ymax), sigmax(sigmax), sigmay(sigmay) + { + delx = xmax - xmin; + dely = ymax - ymin; + if (delx<=0 || dely<=0) + throw std::invalid_argument("Grid size must be positive"); + } + + //! Get the smoothed density from an input grid + const Eigen::MatrixXd& operator()(const Eigen::MatrixXd& GRID) + { + if (GRID.rows() != numx || GRID.cols() != numy) + throw std::invalid_argument("Grid size must match KDE2d object"); + grid = GRID; + kde_fft_2d(); + return smooth; + } + + //! Get the smoothed density from an input data pairs + const Eigen::MatrixXd& operator()(const std::vector>& data) + { + grid_pairs(data); + kde_fft_2d(); + return smooth; + } + + //! Get the smoothed density from an input data arrays + const Eigen::MatrixXd& operator() + (const std::vector& x, const std::vector& y) + { + grid_array(x, y); + kde_fft_2d(); + return smooth; + } + + //! Set debug mode + void setDebug() { debug = true; } + }; +} + +#endif // KDE2D_H diff --git a/utils/PhaseSpace/KDE2d.cc b/utils/PhaseSpace/KDE2d.cc new file mode 100644 index 000000000..1218d4b1c --- /dev/null +++ b/utils/PhaseSpace/KDE2d.cc @@ -0,0 +1,262 @@ +#include + +namespace KDE +{ + // Function to compute the Gaussian kernel in pixel units + // + std::vector KDE2d::gaussian_kernel_2d(int xsize, int ysize) + { + std::vector kernel(xsize * ysize); + double sum = 0.0; + for (int i = -xsize / 2; i < xsize / 2 + 1; i++) { + + double dx = delx*i/numx; + double facx = exp(-0.5*dx*dx/(sigmax*sigmax)); + + for (int j = -ysize / 2; j < ysize / 2 + 1; j++) { + + double dy = dely*j/numy; + double facy = exp(-0.5*dy*dy/(sigmay*sigmay)); + + kernel[(i + xsize / 2) * ysize + (j + ysize / 2)] = facx * facy; + + sum += facx * facy; + } + } + + // Normalize the kernel + // + for (auto & v : kernel) v /= sum; + + return kernel; + } + + + void KDE2d::kde_fft_2d() + { + // Sanity check + // + if (numx != grid.rows() || numy != grid.cols()) + throw std::runtime_error("Invalid grid dimensions"); + + // Compute the Gaussian kernel + // + int kern_xsize = std::max(minKsize, + std::floor(sigmax/delx*numx*minKsize)); + + if (kern_xsize % 2 == 0) kern_xsize += 1; // Make the size odd + + int kern_ysize = std::max(minKsize, + std::floor(sigmay/dely*numy*minKsize)); + + if (kern_ysize % 2 == 0) kern_ysize += 1; // Make the size odd + + std::vector kern = gaussian_kernel_2d(kern_xsize, kern_ysize); + + if (debug) { + std::ofstream fout("kern0.dat"); + fout << kern_xsize << " " << kern_ysize << std::endl; + for (int j=0; j padded_grid(padded_xsize * padded_ysize, 0.0); + std::vector padded_kern(padded_xsize * padded_ysize, 0.0); + + for (int i=0; i>& data) + { + // Sanity check + // + if (data.size() == 0) + throw std::invalid_argument("data must be non-empty"); + + // Create a grid to evaluate KDE + // + grid.resize(numx, numy); + grid.setZero(); + for (const auto& point : data) { + int x = static_cast( (point.first - xmin)/delx * numx); + int y = static_cast( (point.second - ymin)/dely * numy); + + if (x >= 0 && x < numx && y >= 0 && y < numy) { + grid(x, y) += 1.0; + } + else { + std::cout << "Out of range: " << point.first << " " << point.second << std::endl; + } + } + } + + void + KDE2d::grid_array(const std::vector& X, const std::vector& Y) + { + // Sanity check + // + if (X.size() != Y.size()) + throw std::invalid_argument("x and y must be the same size"); + + if (X.size() == 0) + throw std::invalid_argument("data must be non-empty"); + + // Create a grid to evaluate KDE + // + grid.resize(numx, numy); + grid.setZero(); + for (int i=0; i= 0 && x < numx && y >= 0 && y < numy) { + grid(x, y) += 1.0; + } + else { + std::cout << "Out of range: " << X[i] << " " << Y[i] << std::endl; + } + } + } + +} +// END namespace KDE diff --git a/utils/PhaseSpace/diffpsp.cc b/utils/PhaseSpace/diffpsp.cc index 42890f9c6..c80e7d99a 100755 --- a/utils/PhaseSpace/diffpsp.cc +++ b/utils/PhaseSpace/diffpsp.cc @@ -47,6 +47,8 @@ #include +#include + #include #include #include @@ -54,6 +56,7 @@ #include // Enhanced option parsing #include // EXP library globals #include +#include "KDE2d.H" // Kernel density estimation void p_rec(std::ofstream& out, double E, double K, double V) { @@ -96,7 +99,8 @@ main(int argc, char **argv) double EMAX; double KMIN; double KMAX; - double I1min=-1, I1max=-1, I2min=-1, I2max=-1; + double Sig1, Sig2; + double I1min, I1max, I2min, I2max; int NLIMIT; int NSKIP; int NREPORT; @@ -151,6 +155,8 @@ main(int argc, char **argv) " OUTFILE.rp Pericentric radius [2d]\n" \ " OUTFILE.O1 Radial frequency [2d]\n" \ " OUTFILE.O2 Azimuthal frequency [2d]\n" \ + " OUTFILE.F1 Two dimensional DF info (1) [2d]\n" \ + " OUTFILE.F2 Two dimensional DF info (2) [2d]\n" \ " OUTFILE.DR Run mass, J, Delta J (R) [1d]\n" \ " OUTFILE.df1 One dimensional DF info (1) [1d]\n" \ " OUTFILE.df2 One dimensional DF info (2) [1d]\n" \ @@ -171,7 +177,7 @@ main(int argc, char **argv) ("jaco", "Compute phase-space Jacobian for DF computation") ("Emass", "Create energy bins approximately uniform in mass using potential from the mass model") ("actions", "Print output in action space rather than E-kappa space. The default is Energy-Kappa.") - ("F,filetype", "input file type (one of: PSPout, PSPspl, GadgetNative, GadgetHDF5)", + ("F,filetype", "input file type (one of: PSPout, PSPspl, PSPhdf5, GadgetNative, GadgetHDF5)", cxxopts::value(fileType)->default_value("PSPout")) ("I1min", "Minimum grid value for E (or I1 for actions)", cxxopts::value(I1min)) @@ -241,6 +247,10 @@ main(int argc, char **argv) cxxopts::value(DIVERGE_RFAC)->default_value("1.0")) ("Kpow", "Create kappa bins with power scaling", cxxopts::value(KPOWER)->default_value("1.0")) + ("Sig1", "Bin smoothing in Dimension 1 (must be > 0)", + cxxopts::value(Sig1)->default_value("0.0")) + ("Sig2", "Bin smoothing in Dimension 2 (must be > 0)", + cxxopts::value(Sig2)->default_value("0.0")) ("INFILE1", "Fiducial phase-space file", cxxopts::value>(INFILE1)) ("INFILE2", "Evolved phase-space file", @@ -321,8 +331,30 @@ main(int argc, char **argv) // Check for existence of files in INFILE1 and INFILE2 lists // unsigned bad = 0; - for (auto file : INFILE1) { - if (not std::filesystem::exists(std::filesystem::path(file))) bad++; + std::string path1, path2; + if (fileType == "PSPhdf5") { + path1 = CURDIR + INFILE1[0]; + std::filesystem::path dir_path = path1; + if (std::filesystem::is_directory(dir_path)) { + INFILE1.clear(); + try { + for (const auto& entry : std::filesystem::directory_iterator(dir_path)) { + if (std::filesystem::is_regular_file(entry)) { + std::string file = entry.path().string(); + if (H5::H5File::isHdf5(file)) INFILE1.push_back(file); + } + } + } catch (const std::filesystem::filesystem_error& e) { + std::cerr << "diffpsp error: " << e.what() << std::endl; + } + + if (INFILE1.size()==0) bad++; + } + } + else { + for (auto file : INFILE1) { + if (not std::filesystem::exists(std::filesystem::path(file))) bad++; + } } if (bad) { @@ -333,8 +365,29 @@ main(int argc, char **argv) exit(-1); } - for (auto file : INFILE2) { - if (not std::filesystem::exists(std::filesystem::path(file))) bad++; + if (fileType == "PSPhdf5") { + path2 = CURDIR + INFILE2[0]; + std::filesystem::path dir_path = path2; + if (std::filesystem::is_directory(dir_path)) { + INFILE2.clear(); + try { + for (const auto& entry : std::filesystem::directory_iterator(dir_path)) { + if (std::filesystem::is_regular_file(entry)) { + std::string file = entry.path().string(); + if (H5::H5File::isHdf5(file)) INFILE2.push_back(file); + } + } + } catch (const std::filesystem::filesystem_error& e) { + std::cerr << "diffpsp error: " << e.what() << std::endl; + } + + if (INFILE2.size()==0) bad++; + } + } + else { + for (auto file : INFILE2) { + if (not std::filesystem::exists(std::filesystem::path(file))) bad++; + } } if (bad) { @@ -405,7 +458,7 @@ main(int argc, char **argv) // Open output file // - const int nfiles = 19; + const int nfiles = 21; const char *suffix[] = { ".DM", // Mass per bin (E, K) #0 ".DE", // Delta E(E, K) #1 @@ -418,14 +471,16 @@ main(int argc, char **argv) ".DF", // Delta DF (E, K) #8 ".ra", // Apocentric radius (E, K) #9 ".rp", // Pericentric radius (E, K) #10 - ".O1", // Apocentric radius (E, K) #11 - ".O2", // Pericentric radius (E, K) #12 - ".Df", // Delta DF/F (E, K) #13 - ".DN", // Counts per (E, K) bin #14 - ".DR", // Run mass, J, Delta J (R) #15 - ".chk", // Orbital element check #16 - ".df1", // One dimensional DF info #17 - ".df2" // One dimensional DF info #18 + ".O1", // Radial frequency (E, K) #11 + ".O2", // Azimuthal frequency (E, K) #12 + ".F1", // DF for PS 1 (E, K) #13 + ".F2", // DF for PS 2 (E, K) #14 + ".Df", // Delta DF/F (E, K) #15 + ".DN", // Counts per (E, K) bin #16 + ".DR", // Run mass, J, Delta J (R) #17 + ".chk", // Orbital element check #18 + ".df1", // One dimensional DF info #19 + ".df2" // One dimensional DF info #20 }; std::vector filename(nfiles); for (int i=0; i(KTOL, KMIN); - if (I2max<0) I2max = std::min(1.0 - KTOL, KMAX); + if (vm.count("I1min")==0) I1min = Emin; + if (vm.count("I1max")==0) I1max = Emax; + if (vm.count("I2min")==0) I2min = std::max(KTOL, KMIN); + if (vm.count("I2max")==0) I2max = std::min(1.0 - KTOL, KMAX); + + if (myid==0) + std::cout << std::endl + << std::string(40, '-') << std::endl + << "---- Range limits" << std::endl + << std::string(40, '-') << std::endl + << "-- Emin: " << I1min << std::endl + << "-- Emax: " << I1max << std::endl + << "-- Kmin: " << I2min << std::endl + << "-- Kmax: " << I2max << std::endl + << std::string(40, '-') << std::endl; } double d1 = (I1max - I1min) / NUM1; @@ -562,15 +628,27 @@ main(int argc, char **argv) // double initl_time, final_time; + // Number of paths + // + int npath1 = 1, npath2 = 1; + if (fileType != "PSPhdf5") { + npath1 = INFILE1.size(); + npath2 = INFILE2.size(); + } // Iterate through file list // - for (size_t n=0; nCurrentTime(); @@ -578,7 +656,10 @@ main(int argc, char **argv) if (myid==0) { std::cout << std::endl << std::string(40, '-') << std::endl; - std::cout << "File 1: " << INFILE1[n] << std::endl; + if (fileType == "PSPhdf5") + std::cout << "Path 1: " << path1 << std::endl; + else + std::cout << "File 1: " << CURDIR + INFILE1[n] << std::endl; std::cout << "Found dump at time: " << initl_time << std::endl; } } @@ -593,14 +674,22 @@ main(int argc, char **argv) } try { - psp2 = PR::ParticleReader::createReader(fileType, {INFILE2[n]}, myid, true); + if (fileType == "PSPhdf5") { + std::string path = CURDIR + INFILE2[0]; + psp2 = PR::ParticleReader::createReader(fileType, INFILE2, myid, true); + } else { + psp2 = PR::ParticleReader::createReader(fileType, {INFILE2[n]}, myid, true); + } final_time = psp2->CurrentTime(); psp2->SelectType(COMP); if (myid==0) { - std::cout << "File 2: " << INFILE2[n] << endl; + if (fileType == "PSPhdf5") + std::cout << "Path 2: " << path2 << endl; + else + std::cout << "File 2: " << INFILE2[n] << endl; std::cout << "Found dump at time: " << final_time << std::endl; std::cout << std::string(40, '-') << std::endl; } @@ -1243,12 +1332,32 @@ main(int argc, char **argv) double mfac = d1*d2; if (meshgrid) - for (int k=0; k<15; k++) out[k] << std::setw(8) << NUM1 + for (int k=0; k<17; k++) out[k] << std::setw(8) << NUM1 << std::setw(8) << NUM2 << std::endl; bool relJ = false; if (vm.count("relJ")) relJ = true; + // Smooth arrays + // + if (Sig1 > 0.0 and Sig2 > 0.0) { + + KDE::KDE2d kde(NUM1, NUM2, + I1min, I1max, I2min, I2max, Sig1, Sig2); + + histoM = kde(histoM); + histoC = kde(histoC); + histoE = kde(histoE); + histoJ = kde(histoJ); + histoR = kde(histoR); + histoI = kde(histoI); + histoT = kde(histoT); + histoF = kde(histoF); + histo1 = kde(histo1); + histo2 = kde(histo2); + histoDF = kde(histoDF); + } + for (int j=0; j0.0) { - p_rec(out[8], I1, I2, histoF(i, j)*jfac/totMass); + p_rec(out[8 ], I1, I2, histoF(i, j)*jfac/totMass); + p_rec(out[13], I1, I2, histo1(i, j)*jfac/totMass); + p_rec(out[14], I1, I2, histo2(i, j)*jfac/totMass); } else { - p_rec(out[8], I1, I2, 0.0); + p_rec(out[8 ], I1, I2, 0.0); + p_rec(out[13], I1, I2, 0.0); + p_rec(out[14], I1, I2, 0.0); } - p_rec(out[13], I1, I2, histoDF(i, j)); - p_rec(out[14], I1, I2, histoC (i, j)); + p_rec(out[15], I1, I2, histoDF(i, j)); + p_rec(out[16], I1, I2, histoC (i, j)); } - if (not meshgrid) for (int k=0; k<15; k++) out[k] << endl; + if (not meshgrid) for (int k=0; k<17; k++) out[k] << endl; } if (CUMULATE) { @@ -1357,36 +1470,36 @@ main(int argc, char **argv) }; for (int j=0; j #include +#include +#include #include #include #include #include #include +#include + #include #include #include @@ -75,7 +78,7 @@ main(int argc, char **argv) options.add_options() ("h,help", "This help message") - ("F,filetype", "input file type (one of: PSPout, PSPspl, GadgetNative, GadgetHDF5)", + ("F,filetype", "input file type (one of: PSPout, PSPspl, GadgetNative, GadgetHDF5, PSPhdf5)", cxxopts::value(fileType)->default_value("PSPout")) ("RMIN", "Minimum model radius", cxxopts::value(RMIN)->default_value("0.0")) @@ -138,23 +141,52 @@ main(int argc, char **argv) // Per file weight // double weight = 1.0/INFILE.size(); + int nfiles = INFILE.size(); + + std::string path; + + if (fileType=="PSPhdf5") { + path = CURDIR + INFILE[0]; + std::filesystem::path dir_path = path; + if (std::filesystem::is_directory(dir_path)) { + INFILE.clear(); + try { + for (const auto& entry : std::filesystem::directory_iterator(dir_path)) { + if (std::filesystem::is_regular_file(entry)) { + std::string file = entry.path().string(); + if (H5::H5File::isHdf5(file)) INFILE.push_back(file); + } + } + } catch (const std::filesystem::filesystem_error& e) { + std::cerr << "pspmono error: " << e.what() << std::endl; + } + } + + nfiles = 1; + } // Loop through files // - for (size_t n=0; nSelectType(COMP); time = psp->CurrentTime(); if (myid==0) { - std::cout << "File: " << INFILE[n] << std::endl; + if (fileType=="PSPhdf5") + std::cout << "Path: " << path << std::endl; + else + std::cout << "File: " << INFILE[n] << std::endl; std::cout << "Found dump at time: " << time << std::endl; } }