From 0269572a7f633c4c974b14b2642659c933121588 Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 29 Jul 2024 09:56:03 -0400 Subject: [PATCH 001/124] patch for mpich send buffr --- src/framework/domain/metadomain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index b13475fd6..26a4f3168 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -390,7 +390,7 @@ namespace ntt { #if defined(MPI_ENABLED) auto dx_mins = std::vector(g_ndomains); dx_mins[g_mpi_rank] = dx_min; - MPI_Allgather(&dx_mins[g_mpi_rank], + MPI_Allgather(&dx_min, 1, mpi::get_type(), dx_mins.data(), From 35ebe5328b2aa7df5fe95f0f6e9927b0769411b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 3 Aug 2024 14:54:47 -0500 Subject: [PATCH 002/124] initial commit: modification to shock pgen to run magnetized shocks --- setups/srpic/shock/pgen.hpp | 57 ++++++++++++++++++++++++++++++++--- setups/srpic/shock/shock.toml | 4 +++ 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index f07b99878..4a9cc3f09 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -14,6 +14,47 @@ namespace user { using namespace ntt; + template + struct InitFields + { + InitFields(real_t bmag, real_t btheta, real_t bphi, real_t bbeta) : + Bmag { bmag }, Btheta { btheta }, Bphi { bphi }, Bbeta { bbeta } {} + + // alternative: initialize magnetisation from simulation parameters as in Tristan? + // Bmag = math::sqrt(ppc0 * 0.5 * c * c * me * sigma); + + // magnetic field components + Inline auto bx1(const coord_t &x_Ph) const -> real_t + { + return Bmag * math::cos(Btheta / 180.0 * Kokkos::numbers::pi); + } + Inline auto bx2(const coord_t &x_Ph) const -> real_t + { + return Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); + } + Inline auto bx3(const coord_t &x_Ph) const -> real_t + { + return Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); + } + + // electric field components + Inline auto ex1(const coord_t &x_Ph) const -> real_t + { + return ZERO; + } + Inline auto ex2(const coord_t &x_Ph) const -> real_t + { + return -Bbeta * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); + } + Inline auto ex3(const coord_t &x_Ph) const -> real_t + { + return -Bbeta * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); + } + + private: + const real_t Btheta, Bphi, Bbeta, Bmag; + }; + template struct PGen : public arch::ProblemGenerator { // compatibility traits for the problem generator @@ -30,10 +71,18 @@ namespace user { const real_t drift_ux, temperature; - inline PGen(const SimulationParams& p, const Metadomain& m) - : arch::ProblemGenerator(p) - , drift_ux { p.template get("setup.drift_ux") } - , temperature { p.template get("setup.temperature") } {} + const real_t Btheta, Bphi, Bbeta, Bmag; + InitFields init_flds; + + inline PGen(const SimulationParams &p, const Metadomain &m) + : arch::ProblemGenerator { p } + , drift_ux { p.template get("setup.drift_ux") } + , temperature { p.template get("setup.temperature") } + , Bmag { p.template get("setup.Bmag", 0.0) } + , Btheta { p.template get("setup.Btheta", 0.0) } + , Bphi { p.template get("setup.Bphi", 0.0) } + , Bbeta { p.template get("setup.Bbeta", 0.0) } + , init_flds { Bmag, Btheta, Bphi, Bbeta } {} inline PGen() {} diff --git a/setups/srpic/shock/shock.toml b/setups/srpic/shock/shock.toml index f48edb2d6..90571631e 100644 --- a/setups/srpic/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -42,6 +42,10 @@ [setup] drift_ux = 0.1 temperature = 1e-3 + Bmag = 0.0 + Btheta = 0.0 + Bphi = 0.0 + Bbeta = 0.0 [output] interval_time = 0.1 From 1920f4415ec6d78c5f880a2afd73a2deac805213 Mon Sep 17 00:00:00 2001 From: jmahlmann Date: Wed, 21 Aug 2024 20:15:11 -0400 Subject: [PATCH 003/124] Add bulk velocity as moment output. --- src/framework/domain/output.cpp | 8 ++++ src/global/enums.h | 5 ++- src/global/tests/enums.cpp | 2 +- src/kernels/particle_moments.hpp | 65 +++++++++++++++++++++++++++++++- src/output/fields.cpp | 3 ++ src/output/fields.h | 4 +- 6 files changed, 81 insertions(+), 6 deletions(-) diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index be154ce16..39c44192d 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -294,6 +294,14 @@ namespace ntt { {}, local_domain->fields.bckp, c); + } else if (fld.id() == FldsID::V) { + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + fld.species, + fld.comp[0], + local_domain->fields.bckp, + c); } else { raise::Error("Wrong moment requested for output", HERE); } diff --git a/src/global/enums.h b/src/global/enums.h index 57822dec4..f7b1278c6 100644 --- a/src/global/enums.h +++ b/src/global/enums.h @@ -289,16 +289,17 @@ namespace ntt { N = 12, Nppc = 13, Custom = 14, + V = 15, }; constexpr FldsID(uint8_t c) : enums_hidden::BaseEnum { c } {} static constexpr type variants[] = { E, divE, D, divD, B, H, J, - A, T, Rho, Charge, N, Nppc, Custom }; + A, T, Rho, Charge, N, Nppc, Custom , V}; static constexpr const char* lookup[] = { "e", "dive", "d", "divd", "b", "h", "j", "a", "t", "rho", "charge", "n", - "nppc", "custom" }; + "nppc", "custom", "v" }; static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); }; diff --git a/src/global/tests/enums.cpp b/src/global/tests/enums.cpp index 1fc57398f..8f814e9df 100644 --- a/src/global/tests/enums.cpp +++ b/src/global/tests/enums.cpp @@ -68,7 +68,7 @@ auto main() -> int { enum_str_t all_out_flds = { "e", "dive", "d", "divd", "b", "h", "j", "a", "t", "rho", - "charge", "n", "nppc", "custom" }; + "charge", "n", "nppc", "custom" , "v"}; checkEnum(all_coords); checkEnum(all_metrics); diff --git a/src/kernels/particle_moments.hpp b/src/kernels/particle_moments.hpp index 8b668a036..83caea563 100644 --- a/src/kernels/particle_moments.hpp +++ b/src/kernels/particle_moments.hpp @@ -41,7 +41,7 @@ namespace kernel { static constexpr auto D = M::Dim; static_assert((F == FldsID::Rho) || (F == FldsID::Charge) || - (F == FldsID::N) || (F == FldsID::Nppc) || (F == FldsID::T), + (F == FldsID::N) || (F == FldsID::Nppc) || (F == FldsID::T) || (F == FldsID::V), "Invalid field ID"); const unsigned short c1, c2; @@ -89,7 +89,7 @@ namespace kernel { std::size_t ni2, real_t inv_n0, unsigned short window) - : c1 { (components.size() == 2) ? components[0] + : c1 { (components.size() > 0) ? components[0] : static_cast(0) } , c2 { (components.size() == 2) ? components[1] : static_cast(0) } @@ -205,6 +205,67 @@ namespace kernel { coeff = contrib; } + if constexpr (F == FldsID::V) { + real_t gamma { ZERO }; + // for stress-energy tensor + vec_t u_Phys { ZERO }; + if constexpr (S == SimEngine::SRPIC) { + // SR + // stress-energy tensor for SR is computed in the tetrad (hatted) basis + if constexpr (M::CoordType == Coord::Cart) { + u_Phys[0] = ux1(p); + u_Phys[1] = ux2(p); + u_Phys[2] = ux3(p); + } else { + static_assert(D != Dim::_1D, "non-Cartesian SRPIC 1D"); + coord_t x_Code { ZERO }; + x_Code[0] = static_cast(i1(p)) + static_cast(dx1(p)); + x_Code[1] = static_cast(i2(p)) + static_cast(dx2(p)); + if constexpr (D == Dim::_3D) { + x_Code[2] = static_cast(i3(p)) + static_cast(dx3(p)); + } else { + x_Code[2] = phi(p); + } + metric.template transform_xyz( + x_Code, + { ux1(p), ux2(p), ux3(p) }, + u_Phys); + } + if (mass == ZERO) { + gamma = NORM(u_Phys[0], u_Phys[1], u_Phys[2]); + } else { + gamma = math::sqrt(ONE + NORM_SQR(u_Phys[0], u_Phys[1], u_Phys[2])); + } + } else { + // GR + // stress-energy tensor for GR is computed in contravariant basis + static_assert(D != Dim::_1D, "GRPIC 1D"); + coord_t x_Code { ZERO }; + x_Code[0] = static_cast(i1(p)) + static_cast(dx1(p)); + x_Code[1] = static_cast(i2(p)) + static_cast(dx2(p)); + if constexpr (D == Dim::_3D) { + x_Code[2] = static_cast(i3(p)) + static_cast(dx3(p)); + } + vec_t u_Cntrv { ZERO }; + // compute u_i u^i for energy + metric.template transform(x_Code, + { ux1(p), ux2(p), ux3(p) }, + u_Cntrv); + gamma = u_Cntrv[0] * ux1(p) + u_Cntrv[1] * ux2(p) + u_Cntrv[2] * ux3(p); + if (mass == ZERO) { + gamma = math::sqrt(gamma); + } else { + gamma = math::sqrt(ONE + gamma); + } + metric.template transform(x_Code, u_Cntrv, u_Phys); + } + // compute the corresponding moment + coeff = u_Phys[c1 - 1] / gamma; + } else { + // for other cases, use the `contrib` defined above + coeff = contrib; + } + if constexpr (F != FldsID::Nppc) { // for nppc calculation ... // ... do not take volume, weights or smoothing into account diff --git a/src/output/fields.cpp b/src/output/fields.cpp index aa5a752d4..0c2ea5e50 100644 --- a/src/output/fields.cpp +++ b/src/output/fields.cpp @@ -44,6 +44,9 @@ namespace out { } else if (id() == FldsID::T) { // energy-momentum tensor comp = InterpretComponents({ name.substr(1, 1), name.substr(2, 1) }); + } else if (id() == FldsID::V) { + // energy-momentum tensor + comp = InterpretComponents({ name.substr(1, 1) }); } else { // scalar (Rho, divE, Custom, etc.) comp = {}; diff --git a/src/output/fields.h b/src/output/fields.h index a520a246d..4fde18ed2 100644 --- a/src/output/fields.h +++ b/src/output/fields.h @@ -43,7 +43,7 @@ namespace out { [[nodiscard]] auto is_moment() const -> bool { return (id() == FldsID::T || id() == FldsID::Rho || id() == FldsID::Nppc || - id() == FldsID::N || id() == FldsID::Charge); + id() == FldsID::N || id() == FldsID::Charge || id() == FldsID::V); } [[nodiscard]] @@ -94,6 +94,8 @@ namespace out { tmp += m_name.substr(1, 2); } else if (id() == FldsID::A) { tmp += "3"; + } else if (id() == FldsID::V) { + tmp += m_name.substr(1, 1); } else if (is_field()) { tmp += "i"; } From bdd96be69727ba0b39ec02b4469a7dca56ed6f40 Mon Sep 17 00:00:00 2001 From: jmahlmann Date: Mon, 26 Aug 2024 11:46:57 -0400 Subject: [PATCH 004/124] Update input. --- input.example.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/input.example.toml b/input.example.toml index 88589495c..1c4e0466d 100644 --- a/input.example.toml +++ b/input.example.toml @@ -329,7 +329,7 @@ # Field quantities to output: # @type: array of strings # @valid: fields: "E", "B", "J", "divE" - # @valid: moments: "Rho", "Charge", "N", "Nppc", "T0i", "Tij" + # @valid: moments: "Rho", "Charge", "N", "Nppc", "T0i", "Tij", "Vi" # @valid: for GR: "D", "H", "divD", "A" # @default: [] # @note: For T, you can use unspecified indices, e.g., Tij, T0i, or specific ones, e.g., Ttt, T00, T02, T23 From 51323608d56e7a2cb594d4d921c40f03309e63ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 5 Sep 2024 15:34:22 -0500 Subject: [PATCH 005/124] fix misunderstanding in setup --- setups/srpic/shock/pgen.hpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index 4a9cc3f09..c3771cde2 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -17,8 +17,8 @@ namespace user { template struct InitFields { - InitFields(real_t bmag, real_t btheta, real_t bphi, real_t bbeta) : - Bmag { bmag }, Btheta { btheta }, Bphi { bphi }, Bbeta { bbeta } {} + InitFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) : + Bmag { bmag }, Btheta { btheta }, Bphi { bphi }, Vx { drift_ux } {} // alternative: initialize magnetisation from simulation parameters as in Tristan? // Bmag = math::sqrt(ppc0 * 0.5 * c * c * me * sigma); @@ -44,15 +44,15 @@ namespace user { } Inline auto ex2(const coord_t &x_Ph) const -> real_t { - return -Bbeta * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); + return -Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); } Inline auto ex3(const coord_t &x_Ph) const -> real_t { - return -Bbeta * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); + return -Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); } private: - const real_t Btheta, Bphi, Bbeta, Bmag; + const real_t Btheta, Bphi, Vx, Bmag; }; template @@ -71,7 +71,7 @@ namespace user { const real_t drift_ux, temperature; - const real_t Btheta, Bphi, Bbeta, Bmag; + const real_t Btheta, Bphi, Bmag; InitFields init_flds; inline PGen(const SimulationParams &p, const Metadomain &m) @@ -81,8 +81,7 @@ namespace user { , Bmag { p.template get("setup.Bmag", 0.0) } , Btheta { p.template get("setup.Btheta", 0.0) } , Bphi { p.template get("setup.Bphi", 0.0) } - , Bbeta { p.template get("setup.Bbeta", 0.0) } - , init_flds { Bmag, Btheta, Bphi, Bbeta } {} + , init_flds { Bmag, Btheta, Bphi, drift_ux } {} inline PGen() {} From cdfd2859c1c5bf163478ac2f48a5ca90916611b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 11 Sep 2024 13:26:00 -0500 Subject: [PATCH 006/124] fix sign error --- setups/srpic/shock/pgen.hpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index c3771cde2..1194e7fed 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -20,9 +20,6 @@ namespace user { InitFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) : Bmag { bmag }, Btheta { btheta }, Bphi { bphi }, Vx { drift_ux } {} - // alternative: initialize magnetisation from simulation parameters as in Tristan? - // Bmag = math::sqrt(ppc0 * 0.5 * c * c * me * sigma); - // magnetic field components Inline auto bx1(const coord_t &x_Ph) const -> real_t { @@ -44,7 +41,7 @@ namespace user { } Inline auto ex2(const coord_t &x_Ph) const -> real_t { - return -Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); + return Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); } Inline auto ex3(const coord_t &x_Ph) const -> real_t { From e24078cadd4096d57e75c1dd14c9e92cc96f60aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 11 Sep 2024 14:07:43 -0500 Subject: [PATCH 007/124] fix sign error --- setups/srpic/shock/pgen.hpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index c3771cde2..1194e7fed 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -20,9 +20,6 @@ namespace user { InitFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) : Bmag { bmag }, Btheta { btheta }, Bphi { bphi }, Vx { drift_ux } {} - // alternative: initialize magnetisation from simulation parameters as in Tristan? - // Bmag = math::sqrt(ppc0 * 0.5 * c * c * me * sigma); - // magnetic field components Inline auto bx1(const coord_t &x_Ph) const -> real_t { @@ -44,7 +41,7 @@ namespace user { } Inline auto ex2(const coord_t &x_Ph) const -> real_t { - return -Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); + return Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); } Inline auto ex3(const coord_t &x_Ph) const -> real_t { From 0fc366c76db99099c6eb98ee903abc47e0cf711c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 11 Sep 2024 14:13:54 -0500 Subject: [PATCH 008/124] Added comment for `InitFields` --- setups/srpic/shock/pgen.hpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index 1194e7fed..715c222df 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -16,7 +16,16 @@ namespace user { template struct InitFields - { + { + /* + Sets up magnetic and electric field components for the simulation. + Must satisfy E = -v x B for Lorentz Force to be zero. + + @param bmag: magnetic field scaling + @param btheta: magnetic field polar angle + @param bphi: magnetic field azimuthal angle + @param drift_ux: drift velocity in the x direction + */ InitFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) : Bmag { bmag }, Btheta { btheta }, Bphi { bphi }, Vx { drift_ux } {} From c3b1018069bba7ec582b1d612d3d4c8b9b48ed5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Mon, 23 Sep 2024 11:25:57 -0500 Subject: [PATCH 009/124] fix signs (again) --- setups/srpic/shock/pgen.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index 715c222df..1fdd18faa 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -50,11 +50,11 @@ namespace user { } Inline auto ex2(const coord_t &x_Ph) const -> real_t { - return Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); + return -Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); } Inline auto ex3(const coord_t &x_Ph) const -> real_t { - return -Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); + return Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); } private: From d666d1ea0df93e38d048257a9fc48e7a24407504 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Mon, 23 Sep 2024 11:37:27 -0500 Subject: [PATCH 010/124] removed redundant parameter and added comments --- setups/srpic/shock/shock.toml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setups/srpic/shock/shock.toml b/setups/srpic/shock/shock.toml index 90571631e..e475ae097 100644 --- a/setups/srpic/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -42,10 +42,9 @@ [setup] drift_ux = 0.1 temperature = 1e-3 - Bmag = 0.0 - Btheta = 0.0 - Bphi = 0.0 - Bbeta = 0.0 + Bmag = 0.0 # set to 1.0 if magnetized shock is required + Btheta = 0.0 # magnetic field polar angle + Bphi = 0.0 # magnetic field azimuthal angle [output] interval_time = 0.1 From 0b65f557e56cbf2598c131f87cc8c8d9212179be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 2 Oct 2024 10:10:44 -0500 Subject: [PATCH 011/124] added atmosphere bc to enforce initial magnetic field config at the boundaries --- setups/srpic/shock/pgen.hpp | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index 1fdd18faa..999a7b608 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -57,9 +57,25 @@ namespace user { return Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); } - private: - const real_t Btheta, Bphi, Vx, Bmag; - }; + private: + const real_t Btheta, Bphi, Vx, Bmag; + }; + + template + struct DriveFields : public InitFields { + DriveFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) : + InitFields {bmag, btheta, bphi, drift_ux} {} + + /* Enforce resetting magnetic and electric field at the boundary + This avoids weird */ + using InitFields::bx1; + using InitFields::bx2; + using InitFields::bx3; + + using InitFields::ex1; + using InitFields::ex2; + using InitFields::ex3; + }; template struct PGen : public arch::ProblemGenerator { @@ -91,6 +107,14 @@ namespace user { inline PGen() {} + auto FieldDriver(real_t time) const -> DriveFields { + const real_t bmag = Bmag; + const real_t btheta = Btheta; + const real_t bphi = Bphi; + const real_t ux = drift_ux; + return DriveFields{bmag, btheta, bphi, ux}; + } + inline void InitPrtls(Domain& local_domain) { const auto energy_dist = arch::Maxwellian(local_domain.mesh.metric, local_domain.random_pool, From cbeca51456c0717e08cfb00106d053db7a79fe15 Mon Sep 17 00:00:00 2001 From: hayk Date: Tue, 15 Oct 2024 23:19:47 -0400 Subject: [PATCH 012/124] proper output readwrite test --- src/output/tests/writer-nompi.cpp | 170 +++++++++++++++++++----------- 1 file changed, 107 insertions(+), 63 deletions(-) diff --git a/src/output/tests/writer-nompi.cpp b/src/output/tests/writer-nompi.cpp index 25a9a2c51..c087d2895 100644 --- a/src/output/tests/writer-nompi.cpp +++ b/src/output/tests/writer-nompi.cpp @@ -16,6 +16,8 @@ #include #include +using namespace ntt; + void cleanup() { namespace fs = std::filesystem; fs::path tempfile_path { "test.h5" }; @@ -26,86 +28,128 @@ auto main(int argc, char* argv[]) -> int { Kokkos::initialize(argc, argv); try { - adios2::ADIOS adios; + constexpr auto nx1 = 10; + constexpr auto nx1_gh = nx1 + 2 * N_GHOSTS; + constexpr auto nx2 = 10; + constexpr auto nx2_gh = nx2 + 2 * N_GHOSTS; + constexpr auto nx3 = 10; + constexpr auto nx3_gh = nx3 + 2 * N_GHOSTS; + constexpr auto i1min = N_GHOSTS; + constexpr auto i2min = N_GHOSTS; + constexpr auto i3min = N_GHOSTS; + constexpr auto i1max = nx1 + N_GHOSTS; + constexpr auto i2max = nx2 + N_GHOSTS; + constexpr auto i3max = nx3 + N_GHOSTS; + + ndfield_t field { "fld", nx1_gh, nx2_gh, nx3_gh }; + std::vector field_names; - using namespace ntt; - auto writer = out::Writer(); - writer.init(&adios, "hdf5", "test"); - writer.defineMeshLayout({ 10, 10, 10 }, - { 0, 0, 0 }, - { 10, 10, 10 }, - false, - Coord::Cart); - writer.defineFieldOutputs(SimEngine::SRPIC, { "E", "B", "Rho_1_3", "N_2" }); - - ndfield_t field { "fld", - 10 + 2 * N_GHOSTS, - 10 + 2 * N_GHOSTS, - 10 + 2 * N_GHOSTS }; - Kokkos::parallel_for( - "fill", - CreateRangePolicy({ N_GHOSTS, N_GHOSTS, N_GHOSTS }, - { 10 + N_GHOSTS, 10 + N_GHOSTS, 10 + N_GHOSTS }), - Lambda(index_t i1, index_t i2, index_t i3) { - field(i1, i2, i3, 0) = i1 + i2 + i3; - field(i1, i2, i3, 1) = i1 * i2 / i3; - field(i1, i2, i3, 2) = i1 / i2 * i3; - }); - std::vector names; - std::vector addresses; - for (auto i = 0; i < 3; ++i) { - names.push_back(writer.fieldWriters()[0].name(i)); - addresses.push_back(i); + { + // fill data + Kokkos::parallel_for( + "fill", + CreateRangePolicy({ i1min, i2min, i3min }, + { i1max, i2max, i3max }), + Lambda(index_t i1, index_t i2, index_t i3) { + field(i1, i2, i3, 0) = i1 + i2 + i3; + field(i1, i2, i3, 1) = i1 * i2 / i3; + field(i1, i2, i3, 2) = i1 / i2 * i3; + }); } - writer.beginWriting(0, 0.0); - writer.writeField(names, field, addresses); - writer.endWriting(); - writer.beginWriting(1, 0.1); - writer.writeField(names, field, addresses); - writer.endWriting(); + adios2::ADIOS adios; + + { + // write + auto writer = out::Writer(); + writer.init(&adios, "hdf5", "test"); + writer.defineMeshLayout({ nx1, nx2, nx3 }, + { 0, 0, 0 }, + { nx1, nx3, nx3 }, + false, + Coord::Cart); + writer.defineFieldOutputs(SimEngine::SRPIC, { "E", "B", "Rho_1_3", "N_2" }); + + std::vector addresses; + for (auto i = 0; i < 3; ++i) { + field_names.push_back(writer.fieldWriters()[0].name(i)); + addresses.push_back(i); + } + writer.beginWriting(10, 123.0); + writer.writeField(field_names, field, addresses); + writer.endWriting(); + + writer.beginWriting(20, 123.4); + writer.writeField(field_names, field, addresses); + writer.endWriting(); + } { // read - adios2::ADIOS adios; - adios2::IO io = adios.DeclareIO("read-test"); + adios2::IO io = adios.DeclareIO("read-test"); io.SetEngine("hdf5"); adios2::Engine reader = io.Open("test.h5", adios2::Mode::Read); - std::size_t step { 0 }; - long double time { 0.0 }; - reader.Get(io.InquireVariable("Step"), step); - reader.Get(io.InquireVariable("Time"), time); - raise::ErrorIf(step != 0, "Step is not 0", HERE); - raise::ErrorIf(time != 0.0, "Time is not 0.0", HERE); + raise::ErrorIf(io.InquireAttribute("NGhosts").Data()[0] != 0, + "NGhosts is not correct", + HERE); + raise::ErrorIf(io.InquireAttribute("Dimension").Data()[0] != 3, + "Dimension is not correct", + HERE); for (std::size_t step = 0; reader.BeginStep() == adios2::StepStatus::OK; ++step) { - std::size_t step_read; - adios2::Variable stepVar = io.InquireVariable( - "Step"); - reader.Get(stepVar, step_read); - + std::size_t step_read; long double time_read; + + reader.Get(io.InquireVariable("Step"), step_read); reader.Get(io.InquireVariable("Time"), time_read); - raise::ErrorIf(step_read != step, "Step is not correct", HERE); - raise::ErrorIf((float)time_read != (float)step / 10.0f, + raise::ErrorIf(step_read != (step + 1) * 10, "Step is not correct", HERE); + raise::ErrorIf((float)time_read != 123 + (float)step * 0.4f, "Time is not correct", HERE); - for (const auto& name : names) { - auto data = io.InquireVariable(name); - raise::ErrorIf(data.Shape().size() != 3, - fmt::format("%s is not 3D", name.c_str()), - HERE); - - auto dims = data.Shape(); - std::size_t nx1 = dims[0]; - std::size_t nx2 = dims[1]; - std::size_t nx3 = dims[2]; - raise::ErrorIf((nx1 != 10) || (nx2 != 10) || (nx3 != 10), - fmt::format("%s is not 10x10x10", name.c_str()), - HERE); + array_t field_read { "fld_read", nx1, nx2, nx3 }; + auto field_read_h = Kokkos::create_mirror_view(field_read); + + int cntr = 0; + for (const auto& name : field_names) { + auto fieldVar = io.InquireVariable(name); + if (fieldVar) { + raise::ErrorIf(fieldVar.Shape().size() != 3, + fmt::format("%s is not 3D", name.c_str()), + HERE); + + auto dims = fieldVar.Shape(); + std::size_t nx1_r = dims[0]; + std::size_t nx2_r = dims[1]; + std::size_t nx3_r = dims[2]; + raise::ErrorIf( + (nx1_r != 10) || (nx2_r != 10) || (nx3_r != 10), + fmt::format("%s is not %dx%dx%d", name.c_str(), nx1_r, nx2_r, nx3_r), + HERE); + fieldVar.SetSelection( + adios2::Box({ 0, 0, 0 }, { nx1_r, nx2_r, nx3_r })); + reader.Get(fieldVar, field_read_h.data(), adios2::Mode::Sync); + Kokkos::deep_copy(field_read, field_read_h); + + Kokkos::parallel_for( + "check", + CreateRangePolicy({ 0, 0, 0 }, { nx1_r, nx2_r, nx3_r }), + Lambda(index_t i1, index_t i2, index_t i3) { + if (not cmp::AlmostEqual( + field_read(i1, i2, i3), + field(i1 + i1min, i2 + i2min, i3 + i3min, cntr))) { + printf("%e %e\n", + field_read(i1, i2, i3), + field(i1 + i1min, i2 + i2min, i3 + i3min, cntr)); + raise::KernelError(HERE, "Field is not read correctly"); + } + }); + ++cntr; + } else { + raise::Error("Field not found", HERE); + } } reader.EndStep(); } From de597cfa3d1745fa488bab56a9da0c2af5ce11d0 Mon Sep 17 00:00:00 2001 From: hayk Date: Tue, 15 Oct 2024 23:20:13 -0400 Subject: [PATCH 013/124] TODO marked --- src/output/tests/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/output/tests/CMakeLists.txt b/src/output/tests/CMakeLists.txt index d33cc6c54..37af95fac 100644 --- a/src/output/tests/CMakeLists.txt +++ b/src/output/tests/CMakeLists.txt @@ -4,8 +4,6 @@ # - kokkos [required] # - mpi [optional] # - adios2 [optional] -# !TODO: -# - add more proper write tests for ADIOS2 # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) @@ -27,4 +25,4 @@ if (NOT ${mpi}) gen_test(writer-nompi) else() gen_test(writer-mpi) -endif() \ No newline at end of file +endif() From f91ca40039eced9db228964c50cac8af8e0e965e Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 16 Oct 2024 00:10:56 -0400 Subject: [PATCH 014/124] added downsampling input param --- input.example.toml | 9 +++++---- src/framework/parameters.cpp | 16 ++++++++++------ src/framework/tests/parameters.cpp | 24 ++++++++++++++++++------ src/global/defaults.h | 1 - 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/input.example.toml b/input.example.toml index 06225024a..5ee34d65d 100644 --- a/input.example.toml +++ b/input.example.toml @@ -340,10 +340,6 @@ # @type: array of strings # @default: [] custom = "" - # @NOT_IMPLEMENTED: Stride for the output of fields: - # @type: unsigned short: > 1 - # @default: 1 - stride = "" # Smoothing window for the output of moments (e.g., "Rho", "Charge", "T", etc.): # @type: unsigned short # @default: 0 @@ -357,6 +353,11 @@ # @default: -1.0 (use `output.interval_time`) # @note: When `interval_time` < 0, the output is controlled by `interval`, otherwise by `interval_time` interval_time = "" + # Downsample factor for the output of fields: + # @type: array of unsigned int >= 1 + # @default: [1, 1, 1] + # @note: The output is downsampled by the given factors in each direction + downsampling = "" [output.particles] # Toggle for the particles output: diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 1d4672212..3e2f1f9d7 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -494,12 +494,16 @@ namespace ntt { "fields", "mom_smooth", defaults::output::mom_smooth)); - set("output.fields.stride", - toml::find_or(toml_data, - "output", - "fields", - "stride", - defaults::output::flds_stride)); + auto field_dwn = toml::find_or(toml_data, + "output", + "fields", + "downsampling", + std::vector { 1, 1, 1 }); + raise::ErrorIf(field_dwn.size() > 3, "invalid `output.fields.downsampling`", HERE); + if (field_dwn.size() > dim) { + field_dwn.erase(field_dwn.begin() + (std::size_t)(dim), field_dwn.end()); + } + set("output.fields.downsampling", field_dwn); // particles const auto prtl_out = toml::find_or(toml_data, diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 8d30355b9..393cd2409 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -73,13 +73,18 @@ const auto mink_1d = u8R"( mystr = "hi" [output] - fields = ["Rho", "J", "B"] - particles = ["X", "U"] format = "hdf5" - mom_smooth = 2 - fields_stride = 1 - prtl_stride = 100 - interval_time = 0.01 + + [output.fields] + quantities = ["Rho", "J", "B"] + mom_smooth = 2 + downsampling = [4, 5] + interval = 100 + + [output.particles] + species = [1, 2] + stride = 100 + interval_time = 0.01 )"_toml; const auto sph_2d = u8R"( @@ -315,6 +320,13 @@ auto main(int argc, char* argv[]) -> int { assert_equal(params_mink_1d.get("setup.mystr"), "hi", "setup.mystr"); + + const auto output_stride = params_mink_1d.get>( + "output.fields.downsampling"); + assert_equal(output_stride.size(), + 1, + "output.fields.downsampling.size()"); + assert_equal(output_stride[0], 4, "output.fields.downsampling[0]"); } { diff --git a/src/global/defaults.h b/src/global/defaults.h index ee9a65af5..be92acbf9 100644 --- a/src/global/defaults.h +++ b/src/global/defaults.h @@ -51,7 +51,6 @@ namespace ntt::defaults { const std::string format = "hdf5"; const std::size_t interval = 100; const unsigned short mom_smooth = 0; - const unsigned short flds_stride = 1; const std::size_t prtl_stride = 100; const real_t spec_emin = 1e-3; const real_t spec_emax = 1e3; From 97cccfaf260568e4aa5945f76e12589211625368 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 16 Oct 2024 04:05:50 -0400 Subject: [PATCH 015/124] added field downsampling + test --- extern/adios2 | 2 +- src/checkpoint/tests/checkpoint-nompi.cpp | 4 +- src/framework/domain/output.cpp | 53 ++++-- src/framework/parameters.cpp | 19 +- src/output/tests/writer-nompi.cpp | 66 +++++-- src/output/writer.cpp | 213 ++++++++++++++++------ src/output/writer.h | 26 ++- 7 files changed, 275 insertions(+), 108 deletions(-) diff --git a/extern/adios2 b/extern/adios2 index e524dce1b..a6e8314cc 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit e524dce1b72ccf75422cea6342ee2d64a6a87964 +Subproject commit a6e8314cc3c0b28d496b44dcd4f15685013b887b diff --git a/src/checkpoint/tests/checkpoint-nompi.cpp b/src/checkpoint/tests/checkpoint-nompi.cpp index 8f7a522fd..23dbd8871 100644 --- a/src/checkpoint/tests/checkpoint-nompi.cpp +++ b/src/checkpoint/tests/checkpoint-nompi.cpp @@ -29,9 +29,9 @@ auto main(int argc, char* argv[]) -> int { try { constexpr auto nx1 = 10; constexpr auto nx1_gh = nx1 + 2 * N_GHOSTS; - constexpr auto nx2 = 10; + constexpr auto nx2 = 13; constexpr auto nx2_gh = nx2 + 2 * N_GHOSTS; - constexpr auto nx3 = 10; + constexpr auto nx3 = 9; constexpr auto nx3_gh = nx3 + 2 * N_GHOSTS; constexpr auto i1min = N_GHOSTS; constexpr auto i2min = N_GHOSTS; diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 0918eb2d3..46ca95ede 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -69,6 +69,8 @@ namespace ntt { g_writer.defineMeshLayout(glob_shape_with_ghosts, off_ncells_with_ghosts, loc_shape_with_ghosts, + params.template get>( + "output.fields.downsampling"), incl_ghosts, M::CoordType); const auto fields_to_write = params.template get>( @@ -216,37 +218,50 @@ namespace ntt { g_writer.beginWriting(current_step, current_time); if (write_fields) { const auto incl_ghosts = params.template get("output.debug.ghosts"); + const auto dwn = params.template get>( + "output.fields.downsampling"); for (unsigned short dim = 0; dim < M::Dim; ++dim) { - const auto is_last = local_domain->offset_ncells()[dim] + - local_domain->mesh.n_active()[dim] == - mesh().n_active()[dim]; - array_t xc { "Xc", - local_domain->mesh.n_active()[dim] + - (incl_ghosts ? 2 * N_GHOSTS : 0) }; - array_t xe { "Xe", - local_domain->mesh.n_active()[dim] + - (incl_ghosts ? 2 * N_GHOSTS : 0) + - (is_last ? 1 : 0) }; - const auto offset = (incl_ghosts ? N_GHOSTS : 0); - const auto ncells = local_domain->mesh.n_active()[dim]; - const auto& metric = local_domain->mesh.metric; + const auto l_size = local_domain->mesh.n_active()[dim]; + const auto l_offset = local_domain->offset_ncells()[dim]; + const auto g_size = mesh().n_active()[dim]; + + const auto dwn_in_dim = dwn[dim]; + const auto l_size_dwn = static_cast(l_size / dwn_in_dim); + + const auto is_last = l_offset + l_size == g_size; + + const auto add_ghost = (incl_ghosts ? 2 * N_GHOSTS : 0); + const auto add_last = (is_last ? 1 : 0); + + array_t xc { "Xc", l_size_dwn + add_ghost }; + array_t xe { "Xe", l_size_dwn + add_ghost + add_last }; + + const auto offset = (incl_ghosts ? N_GHOSTS : 0); + const auto ncells = l_size_dwn; + const auto first_cell = static_cast(l_offset / dwn_in_dim) * + dwn_in_dim - + l_offset; + + const auto& metric = local_domain->mesh.metric; + Kokkos::parallel_for( "GenerateMesh", ncells, - Lambda(index_t i) { + Lambda(index_t i_dwn) { + const auto i = first_cell + i_dwn * dwn_in_dim; const auto i_ = static_cast(i); coord_t x_Cd { ZERO }, x_Ph { ZERO }; x_Cd[dim] = i_ + HALF; metric.template convert(x_Cd, x_Ph); - xc(offset + i) = x_Ph[dim]; - x_Cd[dim] = i_; + xc(offset + i_dwn) = x_Ph[dim]; + x_Cd[dim] = i_; metric.template convert(x_Cd, x_Ph); - xe(offset + i) = x_Ph[dim]; - if (is_last && i == ncells - 1) { + xe(offset + i_dwn) = x_Ph[dim]; + if (is_last && i_dwn == ncells - 1) { x_Cd[dim] = i_ + ONE; metric.template convert(x_Cd, x_Ph); - xe(offset + i + 1) = x_Ph[dim]; + xe(offset + i_dwn + 1) = x_Ph[dim]; } }); g_writer.writeMesh(dim, xc, xe); diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 3e2f1f9d7..b667b5ac9 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -503,6 +503,9 @@ namespace ntt { if (field_dwn.size() > dim) { field_dwn.erase(field_dwn.begin() + (std::size_t)(dim), field_dwn.end()); } + for (const auto& dwn : field_dwn) { + raise::ErrorIf(dwn == 0, "downsampling factor must be nonzero", HERE); + } set("output.fields.downsampling", field_dwn); // particles @@ -565,8 +568,20 @@ namespace ntt { /* [output.debug] ------------------------------------------------------- */ set("output.debug.as_is", toml::find_or(toml_data, "output", "debug", "as_is", false)); - set("output.debug.ghosts", - toml::find_or(toml_data, "output", "debug", "ghosts", false)); + const auto output_ghosts = toml::find_or(toml_data, + "output", + "debug", + "ghosts", + false); + set("output.debug.ghosts", output_ghosts); + if (output_ghosts) { + for (const auto& dwn : field_dwn) { + raise::ErrorIf( + dwn != 1, + "full resolution required when outputting with ghost cells", + HERE); + } + } /* [checkpoint] --------------------------------------------------------- */ set("checkpoint.interval", diff --git a/src/output/tests/writer-nompi.cpp b/src/output/tests/writer-nompi.cpp index c087d2895..4c032094b 100644 --- a/src/output/tests/writer-nompi.cpp +++ b/src/output/tests/writer-nompi.cpp @@ -30,9 +30,9 @@ auto main(int argc, char* argv[]) -> int { try { constexpr auto nx1 = 10; constexpr auto nx1_gh = nx1 + 2 * N_GHOSTS; - constexpr auto nx2 = 10; + constexpr auto nx2 = 14; constexpr auto nx2_gh = nx2 + 2 * N_GHOSTS; - constexpr auto nx3 = 10; + constexpr auto nx3 = 17; constexpr auto nx3_gh = nx3 + 2 * N_GHOSTS; constexpr auto i1min = N_GHOSTS; constexpr auto i2min = N_GHOSTS; @@ -41,6 +41,10 @@ auto main(int argc, char* argv[]) -> int { constexpr auto i2max = nx2 + N_GHOSTS; constexpr auto i3max = nx3 + N_GHOSTS; + constexpr auto dwn1 = 2; + constexpr auto dwn2 = 1; + constexpr auto dwn3 = 5; + ndfield_t field { "fld", nx1_gh, nx2_gh, nx3_gh }; std::vector field_names; @@ -51,9 +55,12 @@ auto main(int argc, char* argv[]) -> int { CreateRangePolicy({ i1min, i2min, i3min }, { i1max, i2max, i3max }), Lambda(index_t i1, index_t i2, index_t i3) { - field(i1, i2, i3, 0) = i1 + i2 + i3; - field(i1, i2, i3, 1) = i1 * i2 / i3; - field(i1, i2, i3, 2) = i1 / i2 * i3; + const auto i1_ = static_cast(i1); + const auto i2_ = static_cast(i2); + const auto i3_ = static_cast(i3); + field(i1, i2, i3, 0) = i1_; + field(i1, i2, i3, 1) = i2_; + field(i1, i2, i3, 2) = i3_; }); } @@ -65,7 +72,8 @@ auto main(int argc, char* argv[]) -> int { writer.init(&adios, "hdf5", "test"); writer.defineMeshLayout({ nx1, nx2, nx3 }, { 0, 0, 0 }, - { nx1, nx3, nx3 }, + { nx1, nx2, nx3 }, + { dwn1, dwn2, dwn3 }, false, Coord::Cart); writer.defineFieldOutputs(SimEngine::SRPIC, { "E", "B", "Rho_1_3", "N_2" }); @@ -109,8 +117,7 @@ auto main(int argc, char* argv[]) -> int { "Time is not correct", HERE); - array_t field_read { "fld_read", nx1, nx2, nx3 }; - auto field_read_h = Kokkos::create_mirror_view(field_read); + array_t field_read {}; int cntr = 0; for (const auto& name : field_names) { @@ -124,12 +131,22 @@ auto main(int argc, char* argv[]) -> int { std::size_t nx1_r = dims[0]; std::size_t nx2_r = dims[1]; std::size_t nx3_r = dims[2]; - raise::ErrorIf( - (nx1_r != 10) || (nx2_r != 10) || (nx3_r != 10), - fmt::format("%s is not %dx%dx%d", name.c_str(), nx1_r, nx2_r, nx3_r), - HERE); + raise::ErrorIf((nx1_r != nx1 / dwn1) || (nx2_r != nx2 / dwn2) || + (nx3_r != nx3 / dwn3), + fmt::format("%s = %ldx%ldx%ld is not %dx%dx%d", + name.c_str(), + nx1_r, + nx2_r, + nx3_r, + nx1 / dwn1, + nx2 / dwn2, + nx3 / dwn3), + HERE); + fieldVar.SetSelection( adios2::Box({ 0, 0, 0 }, { nx1_r, nx2_r, nx3_r })); + field_read = array_t(name, nx1_r, nx2_r, nx3_r); + auto field_read_h = Kokkos::create_mirror_view(field_read); reader.Get(fieldVar, field_read_h.data(), adios2::Mode::Sync); Kokkos::deep_copy(field_read, field_read_h); @@ -137,19 +154,32 @@ auto main(int argc, char* argv[]) -> int { "check", CreateRangePolicy({ 0, 0, 0 }, { nx1_r, nx2_r, nx3_r }), Lambda(index_t i1, index_t i2, index_t i3) { - if (not cmp::AlmostEqual( - field_read(i1, i2, i3), - field(i1 + i1min, i2 + i2min, i3 + i3min, cntr))) { - printf("%e %e\n", + if (not cmp::AlmostEqual(field_read(i1, i2, i3), + field(i1 * dwn1 + i1min, + i2 * dwn2 + i2min, + i3 * dwn3 + i3min, + cntr))) { + printf("\n:::::::::::::::\nfield_read(%ld, %ld, %ld) = %f != " + "field(%ld, %ld, %ld, %d) = %f\n:::::::::::::::\n", + i1, + i2, + i3, field_read(i1, i2, i3), - field(i1 + i1min, i2 + i2min, i3 + i3min, cntr)); + i1 * dwn1 + i1min, + i2 * dwn2 + i2min, + i3 * dwn3 + i3min, + cntr, + field(i1 * dwn1 + i1min, + i2 * dwn2 + i2min, + i3 * dwn3 + i3min, + cntr)); raise::KernelError(HERE, "Field is not read correctly"); } }); - ++cntr; } else { raise::Error("Field not found", HERE); } + ++cntr; } reader.EndStep(); } diff --git a/src/output/writer.cpp b/src/output/writer.cpp index 3d526b306..5c6dfe6d6 100644 --- a/src/output/writer.cpp +++ b/src/output/writer.cpp @@ -60,25 +60,44 @@ namespace out { m_mode = mode; } - void Writer::defineMeshLayout(const std::vector& glob_shape, - const std::vector& loc_corner, - const std::vector& loc_shape, - bool incl_ghosts, - Coord coords) { - m_flds_ghosts = incl_ghosts; + void Writer::defineMeshLayout(const std::vector& glob_shape, + const std::vector& loc_corner, + const std::vector& loc_shape, + const std::vector& dwn, + bool incl_ghosts, + Coord coords) { + m_flds_ghosts = incl_ghosts; + m_dwn = dwn; + m_flds_g_shape = glob_shape; m_flds_l_corner = loc_corner; m_flds_l_shape = loc_shape; + for (std::size_t i { 0 }; i < glob_shape.size(); ++i) { + raise::ErrorIf(dwn[i] != 1 && incl_ghosts, + "Downsampling with ghosts not supported", + HERE); + m_flds_g_shape_dwn.push_back( + static_cast(glob_shape[i] / m_dwn[i])); + m_flds_l_corner_dwn.push_back( + static_cast(loc_corner[i] / m_dwn[i])); + m_flds_l_shape_dwn.push_back( + static_cast((loc_corner[i] + loc_shape[i]) / m_dwn[i]) - + static_cast(loc_corner[i] / m_dwn[i])); + m_flds_l_first.push_back( + static_cast(loc_corner[i] / m_dwn[i]) * m_dwn[i] - + loc_corner[i]); + } + m_io.DefineAttribute("NGhosts", incl_ghosts ? N_GHOSTS : 0); m_io.DefineAttribute("Dimension", m_flds_g_shape.size()); m_io.DefineAttribute("Coordinates", std::string(coords.to_string())); for (std::size_t i { 0 }; i < m_flds_g_shape.size(); ++i) { // cell-centers - adios2::Dims g_shape = { m_flds_g_shape[i] }; - adios2::Dims l_corner = { m_flds_l_corner[i] }; - adios2::Dims l_shape = { m_flds_l_shape[i] }; + adios2::Dims g_shape = { m_flds_g_shape_dwn[i] }; + adios2::Dims l_corner = { m_flds_l_corner_dwn[i] }; + adios2::Dims l_shape = { m_flds_l_shape_dwn[i] }; m_io.DefineVariable("X" + std::to_string(i + 1), g_shape, l_corner, @@ -87,8 +106,8 @@ namespace out { // cell-edges const auto is_last = (m_flds_l_corner[i] + m_flds_l_shape[i] == m_flds_g_shape[i]); - adios2::Dims g_shape1 = { m_flds_g_shape[i] + 1 }; - adios2::Dims l_shape1 = { m_flds_l_shape[i] + (is_last ? 1 : 0) }; + adios2::Dims g_shape1 = { m_flds_g_shape_dwn[i] + 1 }; + adios2::Dims l_shape1 = { m_flds_l_shape_dwn[i] + (is_last ? 1 : 0) }; m_io.DefineVariable("X" + std::to_string(i + 1) + "e", g_shape1, l_corner, @@ -100,9 +119,6 @@ namespace out { Kokkos::LayoutRight>::value) { m_io.DefineAttribute("LayoutRight", 1); } else { - std::reverse(m_flds_g_shape.begin(), m_flds_g_shape.end()); - std::reverse(m_flds_l_corner.begin(), m_flds_l_corner.end()); - std::reverse(m_flds_l_shape.begin(), m_flds_l_shape.end()); m_io.DefineAttribute("LayoutRight", 0); } } @@ -110,8 +126,9 @@ namespace out { void Writer::defineFieldOutputs(const SimEngine& S, const std::vector& flds_out) { m_flds_writers.clear(); - raise::ErrorIf((m_flds_g_shape.size() == 0) || (m_flds_l_corner.size() == 0) || - (m_flds_l_shape.size() == 0), + raise::ErrorIf((m_flds_g_shape_dwn.size() == 0) || + (m_flds_l_corner_dwn.size() == 0) || + (m_flds_l_shape_dwn.size() == 0), "Mesh layout must be defined before field output", HERE); for (const auto& fld : flds_out) { @@ -119,17 +136,19 @@ namespace out { } for (const auto& fld : m_flds_writers) { if (fld.comp.size() == 0) { + // scalar m_io.DefineVariable(fld.name(), - m_flds_g_shape, - m_flds_l_corner, - m_flds_l_shape, + m_flds_g_shape_dwn, + m_flds_l_corner_dwn, + m_flds_l_shape_dwn, adios2::ConstantDims); } else { + // vector or tensor for (std::size_t i { 0 }; i < fld.comp.size(); ++i) { m_io.DefineVariable(fld.name(i), - m_flds_g_shape, - m_flds_l_corner, - m_flds_l_shape, + m_flds_g_shape_dwn, + m_flds_l_corner_dwn, + m_flds_l_shape_dwn, adios2::ConstantDims); } } @@ -178,48 +197,105 @@ namespace out { } template - void WriteField(adios2::IO& io, - adios2::Engine& writer, - const std::string& varname, - const ndfield_t& field, - std::size_t comp, - bool ghosts) { - auto var = io.InquireVariable(varname); - const auto gh_zones = ghosts ? 0 : N_GHOSTS; + void WriteField(adios2::IO& io, + adios2::Engine& writer, + const std::string& varname, + const ndfield_t& field, + std::size_t comp, + std::vector dwn, + std::vector first_cell, + bool ghosts) { + // when dwn != 1 in any direction, it is assumed that ghosts == false + auto var = io.InquireVariable(varname); + const auto gh_zones = ghosts ? 0 : N_GHOSTS; + ndarray_t output_field {}; if constexpr (D == Dim::_1D) { - auto slice_i1 = range_tuple_t(gh_zones, field.extent(0) - gh_zones); - auto slice = Kokkos::subview(field, slice_i1, comp); - auto output_field = array_t("output_field", slice.extent(0)); - Kokkos::deep_copy(output_field, slice); - auto output_field_host = Kokkos::create_mirror_view(output_field); - Kokkos::deep_copy(output_field_host, output_field); - writer.Put(var, output_field_host); + if (ghosts || dwn[0] == 1) { + auto slice_i1 = range_tuple_t(gh_zones, field.extent(0) - gh_zones); + auto slice = Kokkos::subview(field, slice_i1, comp); + output_field = array_t { "output_field", slice.extent(0) }; + Kokkos::deep_copy(output_field, slice); + } else { + const auto dwn1 = dwn[0]; + const auto nx1_dwn = static_cast( + (field.extent(0) - 2 * N_GHOSTS) / dwn1); + const auto first_cell1 = first_cell[0]; + output_field = array_t { "output_field", nx1_dwn }; + Kokkos::parallel_for( + "outputField", + nx1_dwn, + Lambda(index_t i1) { + output_field(i1) = field(first_cell1 + i1 * dwn1 + N_GHOSTS, comp); + }); + } } else if constexpr (D == Dim::_2D) { - auto slice_i1 = range_tuple_t(gh_zones, field.extent(0) - gh_zones); - auto slice_i2 = range_tuple_t(gh_zones, field.extent(1) - gh_zones); - auto slice = Kokkos::subview(field, slice_i1, slice_i2, comp); - auto output_field = array_t("output_field", + if (ghosts || (dwn[0] == 1 && dwn[1] == 1)) { + auto slice_i1 = range_tuple_t(gh_zones, field.extent(0) - gh_zones); + auto slice_i2 = range_tuple_t(gh_zones, field.extent(1) - gh_zones); + auto slice = Kokkos::subview(field, slice_i1, slice_i2, comp); + output_field = array_t { "output_field", slice.extent(0), - slice.extent(1)); - Kokkos::deep_copy(output_field, slice); - auto output_field_host = Kokkos::create_mirror_view(output_field); - Kokkos::deep_copy(output_field_host, output_field); - writer.Put(var, output_field_host); + slice.extent(1) }; + Kokkos::deep_copy(output_field, slice); + } else { + const auto dwn1 = dwn[0]; + const auto dwn2 = dwn[1]; + const auto nx1_dwn = static_cast( + (field.extent(0) - 2 * N_GHOSTS) / dwn1); + const auto nx2_dwn = static_cast( + (field.extent(1) - 2 * N_GHOSTS) / dwn2); + const auto first_cell1 = first_cell[0]; + const auto first_cell2 = first_cell[1]; + output_field = array_t { "output_field", nx1_dwn, nx2_dwn }; + Kokkos::parallel_for( + "outputField", + CreateRangePolicy({ 0, 0 }, { nx1_dwn, nx2_dwn }), + Lambda(index_t i1, index_t i2) { + output_field(i1, i2) = field(first_cell1 + i1 * dwn1 + N_GHOSTS, + first_cell2 + i2 * dwn2 + N_GHOSTS, + comp); + }); + } } else if constexpr (D == Dim::_3D) { - auto slice_i1 = range_tuple_t(gh_zones, field.extent(0) - gh_zones); - auto slice_i2 = range_tuple_t(gh_zones, field.extent(1) - gh_zones); - auto slice_i3 = range_tuple_t(gh_zones, field.extent(2) - gh_zones); - auto slice = Kokkos::subview(field, slice_i1, slice_i2, slice_i3, comp); - auto output_field = array_t("output_field", - slice.extent(0), - slice.extent(1), - slice.extent(2)); - Kokkos::deep_copy(output_field, slice); - auto output_field_host = Kokkos::create_mirror_view(output_field); - Kokkos::deep_copy(output_field_host, output_field); - writer.Put(var, output_field_host); + if (ghosts || (dwn[0] == 1 && dwn[1] == 1 && dwn[2] == 1)) { + auto slice_i1 = range_tuple_t(gh_zones, field.extent(0) - gh_zones); + auto slice_i2 = range_tuple_t(gh_zones, field.extent(1) - gh_zones); + auto slice_i3 = range_tuple_t(gh_zones, field.extent(2) - gh_zones); + auto slice = Kokkos::subview(field, slice_i1, slice_i2, slice_i3, comp); + output_field = array_t { "output_field", + slice.extent(0), + slice.extent(1), + slice.extent(2) }; + Kokkos::deep_copy(output_field, slice); + } else { + const auto dwn1 = dwn[0]; + const auto dwn2 = dwn[1]; + const auto dwn3 = dwn[2]; + const auto nx1_dwn = static_cast( + (field.extent(0) - 2 * N_GHOSTS) / dwn1); + const auto nx2_dwn = static_cast( + (field.extent(1) - 2 * N_GHOSTS) / dwn2); + const auto nx3_dwn = static_cast( + (field.extent(2) - 2 * N_GHOSTS) / dwn3); + const auto first_cell1 = first_cell[0]; + const auto first_cell2 = first_cell[1]; + const auto first_cell3 = first_cell[2]; + output_field = array_t { "output_field", nx1_dwn, nx2_dwn, nx3_dwn }; + Kokkos::parallel_for( + "outputField", + CreateRangePolicy({ 0, 0, 0 }, { nx1_dwn, nx2_dwn, nx3_dwn }), + Lambda(index_t i1, index_t i2, index_t i3) { + output_field(i1, i2, i3) = field(first_cell1 + i1 * dwn1 + N_GHOSTS, + first_cell2 + i2 * dwn2 + N_GHOSTS, + first_cell3 + i3 * dwn3 + N_GHOSTS, + comp); + }); + } } + auto output_field_h = Kokkos::create_mirror_view(output_field); + Kokkos::deep_copy(output_field_h, output_field); + writer.Put(var, output_field_h); } template @@ -233,7 +309,14 @@ namespace out { "# of names != # of addresses ", HERE); for (std::size_t i { 0 }; i < addresses.size(); ++i) { - WriteField(m_io, m_writer, names[i], fld, addresses[i], m_flds_ghosts); + WriteField(m_io, + m_writer, + names[i], + fld, + addresses[i], + m_dwn, + m_flds_l_first, + m_flds_ghosts); } } @@ -360,36 +443,48 @@ namespace out { const std::string&, const ndfield_t&, std::size_t, + std::vector, + std::vector, bool); template void WriteField(adios2::IO&, adios2::Engine&, const std::string&, const ndfield_t&, std::size_t, + std::vector, + std::vector, bool); template void WriteField(adios2::IO&, adios2::Engine&, const std::string&, const ndfield_t&, std::size_t, + std::vector, + std::vector, bool); template void WriteField(adios2::IO&, adios2::Engine&, const std::string&, const ndfield_t&, std::size_t, + std::vector, + std::vector, bool); template void WriteField(adios2::IO&, adios2::Engine&, const std::string&, const ndfield_t&, std::size_t, + std::vector, + std::vector, bool); template void WriteField(adios2::IO&, adios2::Engine&, const std::string&, const ndfield_t&, std::size_t, + std::vector, + std::vector, bool); } // namespace out diff --git a/src/output/writer.h b/src/output/writer.h index ba24a3d65..566da44b2 100644 --- a/src/output/writer.h +++ b/src/output/writer.h @@ -37,14 +37,25 @@ namespace out { adios2::Mode m_mode { adios2::Mode::Write }; // global shape of the fields array to output - adios2::Dims m_flds_g_shape; + std::vector m_flds_g_shape; // local corner of the fields array to output - adios2::Dims m_flds_l_corner; + std::vector m_flds_l_corner; // local shape of the fields array to output - adios2::Dims m_flds_l_shape; - bool m_flds_ghosts; - std::string m_engine; - std::string m_fname; + std::vector m_flds_l_shape; + + // downsampling factors for each dimension + std::vector m_dwn; + // starting cell in each dimension (not including ghosts) + std::vector m_flds_l_first; + + // same but downsampled + adios2::Dims m_flds_g_shape_dwn; + adios2::Dims m_flds_l_corner_dwn; + adios2::Dims m_flds_l_shape_dwn; + + bool m_flds_ghosts; + std::string m_engine; + std::string m_fname; std::map m_trackers; @@ -73,7 +84,8 @@ namespace out { void defineMeshLayout(const std::vector&, const std::vector&, const std::vector&, - bool incl_ghosts, + const std::vector&, + bool, Coord); void defineFieldOutputs(const SimEngine&, const std::vector&); From 24691ea2fe8a8ae6ca48e2a9e082e8b099274b73 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 16 Oct 2024 04:28:17 -0400 Subject: [PATCH 016/124] added a cpu runner for actions (totest) --- .github/workflows/actions.yml | 4 +- dev/runners/Dockerfile.runner.cpu | 73 +++++++++++++++++++++++++++++++ dev/runners/README.md | 6 +++ 3 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 dev/runners/Dockerfile.runner.cpu diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index cd7119789..331d6526c 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -11,7 +11,7 @@ jobs: strategy: fail-fast: false matrix: - device: [amd-gpu, nvidia-gpu] + device: [cpu, amd-gpu, nvidia-gpu] precision: [double, single] exclude: - device: amd-gpu @@ -35,6 +35,8 @@ jobs: fi elif [ "${{ matrix.device }}" = "amd-gpu" ]; then FLAGS="-D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_GFX1100=ON" + elif [ "${{ matrix.device }}" = "cpu" ]; then + FLAGS="" fi cmake -B build -D TESTS=ON -D output=ON -D precision=${{ matrix.precision }} $FLAGS - name: Compile diff --git a/dev/runners/Dockerfile.runner.cpu b/dev/runners/Dockerfile.runner.cpu new file mode 100644 index 000000000..fc13ec9b5 --- /dev/null +++ b/dev/runners/Dockerfile.runner.cpu @@ -0,0 +1,73 @@ +FROM ubuntu:22.04 + +ARG DEBIAN_FRONTEND=noninteractive + +# upgrade +RUN apt-get update && apt-get upgrade -y + +# cmake & build tools +RUN apt-get remove -y --purge cmake && \ + apt-get install -y sudo wget curl build-essential && \ + wget "https://github.com/Kitware/CMake/releases/download/v3.29.6/cmake-3.29.6-linux-x86_64.tar.gz" -P /opt && \ + tar xvf /opt/cmake-3.29.6-linux-x86_64.tar.gz -C /opt && \ + rm /opt/cmake-3.29.6-linux-x86_64.tar.gz +ENV PATH=/opt/cmake-3.29.6-linux-x86_64/bin:$PATH + +# adios2 +RUN apt-get update && apt-get install -y git libhdf5-dev && \ + git clone https://github.com/ornladios/ADIOS2.git /opt/adios2-src && \ + cd /opt/adios2-src && \ + cmake -B build \ + -D CMAKE_CXX_STANDARD=17 \ + -D CMAKE_CXX_EXTENSIONS=OFF \ + -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ + -D BUILD_SHARED_LIBS=ON \ + -D ADIOS2_USE_HDF5=ON \ + -D ADIOS2_USE_Python=OFF \ + -D ADIOS2_USE_Fortran=OFF \ + -D ADIOS2_USE_ZeroMQ=OFF \ + -D BUILD_TESTING=OFF \ + -D ADIOS2_BUILD_EXAMPLES=OFF \ + -D ADIOS2_USE_MPI=OFF \ + -D ADIOS2_HAVE_HDF5_VOL=OFF \ + -D CMAKE_INSTALL_PREFIX=/opt/adios2 && \ + cmake --build build -j && \ + cmake --install build && \ + rm -rf /opt/adios2-src + +ENV HDF5_ROOT=/usr +ENV ADIOS2_DIR=/opt/adios2 +ENV PATH=/opt/adios2/bin:$PATH + +# cleanup +RUN apt-get clean && \ + apt-get autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/cache/* && \ + rm -rf /var/lib/log/* && \ + rm -rf /var/lib/apt/lists/* + +ARG USER=runner +RUN useradd -ms /usr/bin/zsh $USER && \ + usermod -aG sudo $USER && \ + echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + +USER $USER +ARG HOME=/home/$USER +WORKDIR $HOME + +# gh runner +ARG RUNNER_VERSION=2.317.0 +RUN mkdir actions-runner +WORKDIR $HOME/actions-runner + +RUN curl -o actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz \ + -L https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz && \ + tar xzf ./actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz && \ + sudo ./bin/installdependencies.sh + +ADD start.sh start.sh +RUN sudo chown $USER:$USER start.sh && \ + sudo chmod +x start.sh + +ENTRYPOINT ["./start.sh"] diff --git a/dev/runners/README.md b/dev/runners/README.md index 08d0cd176..957898fa7 100644 --- a/dev/runners/README.md +++ b/dev/runners/README.md @@ -19,3 +19,9 @@ docker run -e TOKEN= -e LABEL=nvidia-gpu --runtime=nvidia --gpus=all -dt docker build -t ghrunner:amd -f Dockerfile.runner.rocm . docker run -e TOKEN= -e LABEL=amd-gpu --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video -dt ghrunner:amd ``` + +### CPU + +```sh +docker build -t ghrunner:cpu -f Dockerfile.runner.cpu . +``` From e3d40c3b10d2088d95acd215713ac934283a4488 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 16 Oct 2024 04:30:59 -0400 Subject: [PATCH 017/124] RUNTEST --- .github/workflows/actions.yml | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 331d6526c..4db596243 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -1,27 +1,36 @@ name: Unit tests on: - pull_request: - branches: - - '**rc' - - 'master' + push: jobs: + check-commit: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Check commit message + id: check_message + run: | + if git log -1 --pretty=%B | grep -q "RUNTEST"; then + echo "::set-output name=run_tests::true" + else + echo "::set-output name=run_tests::false" + exit 1 tests: + needs: check-commit + if: steps.check_message.outputs.run_tests == 'true' strategy: fail-fast: false matrix: device: [cpu, amd-gpu, nvidia-gpu] precision: [double, single] - exclude: + exclude: # my AMD GPU doesn't support fp64 atomics : ( - device: amd-gpu precision: double - # my AMD GPU doesn't support fp64 atomics : ( - runs-on: [self-hosted, "${{ matrix.device }}"] - if: contains(github.event.head_commit.message, 'totest') steps: - name: Checkout - uses: actions/checkout@v3.3.0 + uses: actions/checkout@v4 - name: Configure run: | if [ "${{ matrix.device }}" = "nvidia-gpu" ]; then From 81f5df0baee2eac88d7220f9d29b6abf4ac2ffa2 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 16 Oct 2024 04:33:16 -0400 Subject: [PATCH 018/124] RUNTEST --- .github/workflows/actions.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 4db596243..703f97a6a 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -28,6 +28,7 @@ jobs: exclude: # my AMD GPU doesn't support fp64 atomics : ( - device: amd-gpu precision: double + runs-on: [self-hosted, "${{ matrix.device }}"] steps: - name: Checkout uses: actions/checkout@v4 From 7b7382bc90427654c1baf0b23e56adfdab969027 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 16 Oct 2024 04:35:26 -0400 Subject: [PATCH 019/124] RUNTEST --- .github/workflows/actions.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 703f97a6a..3af457b6e 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -6,6 +6,8 @@ on: jobs: check-commit: runs-on: ubuntu-latest + outputs: + run_tests: ${{ steps.check_message.outputs.run_tests }} steps: - name: Checkout repository uses: actions/checkout@v4 @@ -19,7 +21,7 @@ jobs: exit 1 tests: needs: check-commit - if: steps.check_message.outputs.run_tests == 'true' + if: needs.check-commit.outputs.run_tests == 'true' strategy: fail-fast: false matrix: From 0a465ad47accef51a4767eb3302182d1fcf1c2ed Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 16 Oct 2024 04:38:12 -0400 Subject: [PATCH 020/124] RUNTEST --- .github/workflows/actions.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 3af457b6e..22f55d8be 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -19,6 +19,7 @@ jobs: else echo "::set-output name=run_tests::false" exit 1 + fi tests: needs: check-commit if: needs.check-commit.outputs.run_tests == 'true' From a52d07d3aad40f53500eb052836d5016141fc8d1 Mon Sep 17 00:00:00 2001 From: jmahlmann Date: Wed, 16 Oct 2024 08:53:39 -0400 Subject: [PATCH 021/124] Bugfix in moment calculation. --- src/kernels/particle_moments.hpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/kernels/particle_moments.hpp b/src/kernels/particle_moments.hpp index 83caea563..904dc3b08 100644 --- a/src/kernels/particle_moments.hpp +++ b/src/kernels/particle_moments.hpp @@ -261,10 +261,7 @@ namespace kernel { } // compute the corresponding moment coeff = u_Phys[c1 - 1] / gamma; - } else { - // for other cases, use the `contrib` defined above - coeff = contrib; - } + } if constexpr (F != FldsID::Nppc) { // for nppc calculation ... From 0caddf9f24103cc5bd1539527921d684261cc06e Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 17 Oct 2024 07:42:12 -0400 Subject: [PATCH 022/124] WIP proper mpi test for checkpoints --- cmake/tests.cmake | 2 +- src/checkpoint/tests/checkpoint-mpi.cpp | 223 ++++++++++++++++++++++++ src/framework/CMakeLists.txt | 1 - src/framework/domain/metadomain.h | 2 +- src/output/tests/writer-mpi.cpp | 7 +- 5 files changed, 229 insertions(+), 6 deletions(-) create mode 100644 src/checkpoint/tests/checkpoint-mpi.cpp diff --git a/cmake/tests.cmake b/cmake/tests.cmake index f1342f679..643ac3d29 100644 --- a/cmake/tests.cmake +++ b/cmake/tests.cmake @@ -17,7 +17,7 @@ if (${mpi}) # tests with mpi if (${output}) add_subdirectory(${SRC_DIR}/output/tests ${CMAKE_CURRENT_BINARY_DIR}/output/tests) - add_subdirectory(${SRC_DIR}/framework/tests ${CMAKE_CURRENT_BINARY_DIR}/checkpoint/tests) + add_subdirectory(${SRC_DIR}/checkpoint/tests ${CMAKE_CURRENT_BINARY_DIR}/checkpoint/tests) add_subdirectory(${SRC_DIR}/framework/tests ${CMAKE_CURRENT_BINARY_DIR}/framework/tests) endif() else() diff --git a/src/checkpoint/tests/checkpoint-mpi.cpp b/src/checkpoint/tests/checkpoint-mpi.cpp new file mode 100644 index 000000000..3ce4bab14 --- /dev/null +++ b/src/checkpoint/tests/checkpoint-mpi.cpp @@ -0,0 +1,223 @@ +#include "enums.h" +#include "global.h" + +#include "utils/comparators.h" + +#include "checkpoint/reader.h" +#include "checkpoint/writer.h" + +#include +#include +#include +#include + +#include +#include +#include + +using namespace ntt; +using namespace checkpoint; + +void cleanup() { + namespace fs = std::filesystem; + fs::path temp_path { "checkpoints" }; + fs::remove_all(temp_path); +} + +auto main(int argc, char* argv[]) -> int { + Kokkos::initialize(argc, argv); + MPI_Init(&argc, &argv); + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + try { + // assuming 4 ranks + // |------|------| + // | 2 | 3 | + // |------|------| + // | | | + // | 0 | 1 | + // |------|------| + constexpr auto g_nx1 = 20; + constexpr auto g_nx2 = 15; + constexpr auto g_nx1_gh = g_nx1 + 4 * N_GHOSTS; + constexpr auto g_nx2_gh = g_nx2 + 4 * N_GHOSTS; + + constexpr auto l_nx1 = 10; + constexpr auto l_nx2 = (rank < 2) ? 10 : 5; + + constexpr auto l_nx1_gh = l_nx1 + 2 * N_GHOSTS; + constexpr auto l_nx2_gh = l_nx2 + 2 * N_GHOSTS; + + constexpr auto l_corner_x1 = (rank % 2) * l_nx1; + constexpr auto l_corner_x2 = (rank / 2) * l_nx2; + + constexpr auto i1min = N_GHOSTS; + constexpr auto i2min = N_GHOSTS; + constexpr auto i1max = l_nx1 + N_GHOSTS; + constexpr auto i2max = l_nx2 + N_GHOSTS; + + constexpr auto npart1 = (rank % 2 + rank) * 23 + 100; + constexpr auto npart2 = (rank % 2 + rank) * 37 + 100; + + // init data + ndfield_t field1 { "fld1", l_nx1_gh, l_nx2_gh }; + ndfield_t field2 { "fld2", l_nx1_gh, l_nx2_gh }; + + array_t i1 { "i_1", npart1 }; + array_t u1 { "u_1", npart1 }; + array_t i2 { "i_2", npart2 }; + array_t u2 { "u_2", npart2 }; + + { + // fill data + Kokkos::parallel_for( + "fillFlds", + CreateRangePolicy({ i1min, i2min }, { i1max, i2max }), + Lambda(index_t i1, index_t i2) { + field1(i1, i2, 0) = static_cast(i1 + i2); + field1(i1, i2, 1) = static_cast(i1 * i2); + field1(i1, i2, 2) = static_cast(i1 / i2); + field1(i1, i2, 3) = static_cast(i1 - i2); + field1(i1, i2, 4) = static_cast(i2 / i1); + field1(i1, i2, 5) = static_cast(i1); + field2(i1, i2, 0) = static_cast(-(i1 + i2)); + field2(i1, i2, 1) = static_cast(-(i1 * i2)); + field2(i1, i2, 2) = static_cast(-(i1 / i2)); + field2(i1, i2, 3) = static_cast(-(i1 - i2)); + field2(i1, i2, 4) = static_cast(-(i2 / i1)); + field2(i1, i2, 5) = static_cast(-i1); + }); + Kokkos::parallel_for( + "fillPrtl1", + npart1, + Lambda(index_t p) { + u1(p) = static_cast(p); + i1(p) = static_cast(p); + }); + Kokkos::parallel_for( + "fillPrtl2", + npart2, + Lambda(index_t p) { + u2(p) = -static_cast(p); + i2(p) = -static_cast(p); + }); + } + + adios2::ADIOS adios; + + { + // write checkpoint + Writer writer; + writer.init(&adios, 0, 0.0, 1); + + writer.defineFieldVariables(SimEngine::GRPIC, + { g_nx1_gh, g_nx2_gh }, + { l_corner_x1, l_corner_x2 }, + { l_nx1, l_nx2 }); + writer.defineParticleVariables(Coord::Sph, Dim::_2D, 2, { 0, 0 }); + + writer.beginSaving(0, 0.0); + + writer.saveField("em", field1); + writer.saveField("em0", field2); + + writer.savePerDomainVariable("s1_npart", 1, 0, npart1); + writer.savePerDomainVariable("s2_npart", 1, 0, npart2); + + writer.saveParticleQuantity("s1_i1", npart1, 0, npart1, i1); + writer.saveParticleQuantity("s1_ux1", npart1, 0, npart1, u1); + writer.saveParticleQuantity("s2_i1", npart2, 0, npart2, i2); + writer.saveParticleQuantity("s2_ux1", npart2, 0, npart2, u2); + + writer.endSaving(); + } + + { + // read checkpoint + ndfield_t field1_read { "fld1_read", nx1_gh, nx2_gh, nx3_gh }; + ndfield_t field2_read { "fld2_read", nx1_gh, nx2_gh, nx3_gh }; + + array_t i1_read { "i_1", npart1 }; + array_t u1_read { "u_1", npart1 }; + array_t i2_read { "i_2", npart2 }; + array_t u2_read { "u_2", npart2 }; + + adios2::IO io = adios.DeclareIO("checkpointRead"); + adios2::Engine reader = io.Open("checkpoints/step-00000000.bp", + adios2::Mode::Read); + reader.BeginStep(); + + auto fieldRange = adios2::Box({ 0, 0, 0, 0 }, + { nx1_gh, nx2_gh, nx3_gh, 6 }); + ReadFields(io, reader, "em", fieldRange, field1_read); + ReadFields(io, reader, "em0", fieldRange, field2_read); + + auto [nprtl1, noff1] = ReadParticleCount(io, reader, 0, 0, 1); + auto [nprtl2, noff2] = ReadParticleCount(io, reader, 1, 0, 1); + + ReadParticleData(io, reader, "ux1", 0, u1_read, nprtl1, noff1); + ReadParticleData(io, reader, "ux1", 1, u2_read, nprtl2, noff2); + ReadParticleData(io, reader, "i1", 0, i1_read, nprtl1, noff1); + ReadParticleData(io, reader, "i1", 1, i2_read, nprtl2, noff2); + + reader.EndStep(); + reader.Close(); + + // check the validity + Kokkos::parallel_for( + "checkFields", + CreateRangePolicy({ 0, 0, 0 }, { nx1_gh, nx2_gh, nx3_gh }), + Lambda(index_t i1, index_t i2, index_t i3) { + for (int i = 0; i < 6; ++i) { + if (not cmp::AlmostEqual(field1(i1, i2, i3, i), + field1_read(i1, i2, i3, i))) { + raise::KernelError(HERE, "Field1 read failed"); + } + if (not cmp::AlmostEqual(field2(i1, i2, i3, i), + field2_read(i1, i2, i3, i))) { + raise::KernelError(HERE, "Field2 read failed"); + } + } + }); + + raise::ErrorIf(npart1 != nprtl1, "Particle count 1 mismatch", HERE); + raise::ErrorIf(npart2 != nprtl2, "Particle count 2 mismatch", HERE); + raise::ErrorIf(noff1 != 0, "Particle offset 1 mismatch", HERE); + raise::ErrorIf(noff2 != 0, "Particle offset 2 mismatch", HERE); + + Kokkos::parallel_for( + "checkPrtl1", + npart1, + Lambda(index_t p) { + if (not cmp::AlmostEqual(u1(p), u1_read(p))) { + raise::KernelError(HERE, "u1 read failed"); + } + if (i1(p) != i1_read(p)) { + raise::KernelError(HERE, "i1 read failed"); + } + }); + Kokkos::parallel_for( + "checkPrtl2", + npart2, + Lambda(index_t p) { + if (not cmp::AlmostEqual(u2(p), u2_read(p))) { + raise::KernelError(HERE, "u2 read failed"); + } + if (i2(p) != i2_read(p)) { + raise::KernelError(HERE, "i2 read failed"); + } + }); + } + + } catch (std::exception& e) { + std::cerr << e.what() << std::endl; + cleanup(); + Kokkos::finalize(); + return 1; + } + cleanup(); + Kokkos::finalize(); + return 0; +} diff --git a/src/framework/CMakeLists.txt b/src/framework/CMakeLists.txt index 241780575..e01759d14 100644 --- a/src/framework/CMakeLists.txt +++ b/src/framework/CMakeLists.txt @@ -32,7 +32,6 @@ set(SOURCES ${SRC_DIR}/domain/grid.cpp ${SRC_DIR}/domain/metadomain.cpp ${SRC_DIR}/domain/communications.cpp - ${SRC_DIR}/domain/checkpoint.cpp ${SRC_DIR}/containers/particles.cpp ${SRC_DIR}/containers/fields.cpp ) diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 027a2982d..7b3042b5b 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -21,7 +21,6 @@ #include "arch/kokkos_aliases.h" #include "utils/timer.h" -#include "checkpoint/writer.h" #include "framework/containers/species.h" #include "framework/domain/domain.h" #include "framework/domain/mesh.h" @@ -32,6 +31,7 @@ #endif // MPI_ENABLED #if defined(OUTPUT_ENABLED) + #include "checkpoint/writer.h" #include "output/writer.h" #include diff --git a/src/output/tests/writer-mpi.cpp b/src/output/tests/writer-mpi.cpp index 6b810fa22..074b9acf0 100644 --- a/src/output/tests/writer-mpi.cpp +++ b/src/output/tests/writer-mpi.cpp @@ -36,10 +36,11 @@ auto main(int argc, char* argv[]) -> int { adios2::ADIOS adios { MPI_COMM_WORLD }; auto writer = out::Writer(); - writer.init(&adios, "hdf5"); + writer.init(&adios, "hdf5", "test"); writer.defineMeshLayout({ static_cast(size) * 10 }, { static_cast(rank) * 10 }, { 10 }, + { 1 }, false, Coord::Cart); writer.defineFieldOutputs(SimEngine::SRPIC, { "E" }); @@ -59,11 +60,11 @@ auto main(int argc, char* argv[]) -> int { names.push_back(writer.fieldWriters()[0].name(i)); addresses.push_back(i); } - writer.beginWriting("test", 0, 0.0); + writer.beginWriting(0, 0.0); writer.writeField(names, field, addresses); writer.endWriting(); - writer.beginWriting("test", 1, 0.1); + writer.beginWriting(1, 0.1); writer.writeField(names, field, addresses); writer.endWriting(); From 0cf9baf37667726f8d2e1a1711d5a7c2ae4980c3 Mon Sep 17 00:00:00 2001 From: Sasha Chernoglazov Date: Wed, 23 Oct 2024 14:18:31 -0400 Subject: [PATCH 023/124] bookkeeping for downsampling --- src/framework/domain/output.cpp | 21 +++++++++++++++++---- src/output/writer.cpp | 27 ++++++++++++++++++++------- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 46ca95ede..4a8871324 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -224,10 +224,16 @@ namespace ntt { for (unsigned short dim = 0; dim < M::Dim; ++dim) { const auto l_size = local_domain->mesh.n_active()[dim]; const auto l_offset = local_domain->offset_ncells()[dim]; + //std::cout << "offset " << l_offset << " " << dim << std::endl; const auto g_size = mesh().n_active()[dim]; const auto dwn_in_dim = dwn[dim]; - const auto l_size_dwn = static_cast(l_size / dwn_in_dim); + double n {l_size}; + double d {dwn_in_dim}; + double l {l_offset}; + double f = math::ceil(l/d)*d-l; + const auto first_cell = static_cast(f); + const auto l_size_dwn = static_cast(math::ceil((n-f) / d)); const auto is_last = l_offset + l_size == g_size; @@ -239,9 +245,12 @@ namespace ntt { const auto offset = (incl_ghosts ? N_GHOSTS : 0); const auto ncells = l_size_dwn; - const auto first_cell = static_cast(l_offset / dwn_in_dim) * - dwn_in_dim - - l_offset; + //const auto first_cell = ((static_cast(l_offset / dwn_in_dim) + 1) * dwn_in_dim - l_offset) % dwn_in_dim; + //const auto first_cell = static_cast(l_offset / dwn_in_dim) * + // dwn_in_dim - + // l_offset; + std::cout << "first cell " << first_cell << " dim " << dim << " l_offset " << l_offset << " dwn_in_dim " << dwn_in_dim << std::endl; + const auto& metric = local_domain->mesh.metric; @@ -251,8 +260,12 @@ namespace ntt { Lambda(index_t i_dwn) { const auto i = first_cell + i_dwn * dwn_in_dim; const auto i_ = static_cast(i); + //if (dim == 1){ + // printf(" i %lu and %f \n", i, i_ ); + // } coord_t x_Cd { ZERO }, x_Ph { ZERO }; x_Cd[dim] = i_ + HALF; + // TODO : change to convert by component metric.template convert(x_Cd, x_Ph); xc(offset + i_dwn) = x_Ph[dim]; x_Cd[dim] = i_; diff --git a/src/output/writer.cpp b/src/output/writer.cpp index 5c6dfe6d6..a63fafa51 100644 --- a/src/output/writer.cpp +++ b/src/output/writer.cpp @@ -77,17 +77,26 @@ namespace out { raise::ErrorIf(dwn[i] != 1 && incl_ghosts, "Downsampling with ghosts not supported", HERE); + + double g = glob_shape[i]; + double d = m_dwn[i]; + double l = loc_corner[i]; + double n = loc_shape[i]; + double f = math::ceil(l/d)*d-l; m_flds_g_shape_dwn.push_back( - static_cast(glob_shape[i] / m_dwn[i])); + static_cast(math::ceil(g / d))); m_flds_l_corner_dwn.push_back( - static_cast(loc_corner[i] / m_dwn[i])); - m_flds_l_shape_dwn.push_back( - static_cast((loc_corner[i] + loc_shape[i]) / m_dwn[i]) - - static_cast(loc_corner[i] / m_dwn[i])); + static_cast(math::ceil(l /d))); m_flds_l_first.push_back( - static_cast(loc_corner[i] / m_dwn[i]) * m_dwn[i] - - loc_corner[i]); + static_cast(f)); + + m_flds_l_shape_dwn.push_back( + static_cast(math::ceil((n - f) / d))); + //m_flds_l_first.push_back( + // static_cast(loc_corner[i] / m_dwn[i]) * m_dwn[i] - + // loc_corner[i]); } + m_io.DefineAttribute("NGhosts", incl_ghosts ? N_GHOSTS : 0); m_io.DefineAttribute("Dimension", m_flds_g_shape.size()); @@ -119,6 +128,9 @@ namespace out { Kokkos::LayoutRight>::value) { m_io.DefineAttribute("LayoutRight", 1); } else { + std::reverse(m_flds_g_shape_dwn.begin(), m_flds_g_shape_dwn.end()); + std::reverse(m_flds_l_corner_dwn.begin(), m_flds_l_corner_dwn.end()); + std::reverse(m_flds_l_shape_dwn.begin(), m_flds_l_shape_dwn.end()); m_io.DefineAttribute("LayoutRight", 0); } } @@ -247,6 +259,7 @@ namespace out { (field.extent(1) - 2 * N_GHOSTS) / dwn2); const auto first_cell1 = first_cell[0]; const auto first_cell2 = first_cell[1]; + printf("%ld %ld : %ld %ld \n", nx1_dwn, nx2_dwn, first_cell1, first_cell2); output_field = array_t { "output_field", nx1_dwn, nx2_dwn }; Kokkos::parallel_for( "outputField", From a0b1ff244c0448e8434c5931847ff3b76d54e0de Mon Sep 17 00:00:00 2001 From: Sasha Chernoglazov Date: Wed, 23 Oct 2024 22:05:07 -0400 Subject: [PATCH 024/124] correct downsampling of field output --- src/framework/domain/output.cpp | 16 ++------ src/output/writer.cpp | 68 +++++++++++++++++---------------- 2 files changed, 39 insertions(+), 45 deletions(-) diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 4a8871324..ac2108a75 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -224,13 +224,12 @@ namespace ntt { for (unsigned short dim = 0; dim < M::Dim; ++dim) { const auto l_size = local_domain->mesh.n_active()[dim]; const auto l_offset = local_domain->offset_ncells()[dim]; - //std::cout << "offset " << l_offset << " " << dim << std::endl; const auto g_size = mesh().n_active()[dim]; const auto dwn_in_dim = dwn[dim]; - double n {l_size}; - double d {dwn_in_dim}; - double l {l_offset}; + double n = l_size; + double d = dwn_in_dim; + double l = l_offset; double f = math::ceil(l/d)*d-l; const auto first_cell = static_cast(f); const auto l_size_dwn = static_cast(math::ceil((n-f) / d)); @@ -245,13 +244,7 @@ namespace ntt { const auto offset = (incl_ghosts ? N_GHOSTS : 0); const auto ncells = l_size_dwn; - //const auto first_cell = ((static_cast(l_offset / dwn_in_dim) + 1) * dwn_in_dim - l_offset) % dwn_in_dim; - //const auto first_cell = static_cast(l_offset / dwn_in_dim) * - // dwn_in_dim - - // l_offset; - std::cout << "first cell " << first_cell << " dim " << dim << " l_offset " << l_offset << " dwn_in_dim " << dwn_in_dim << std::endl; - const auto& metric = local_domain->mesh.metric; Kokkos::parallel_for( @@ -260,9 +253,6 @@ namespace ntt { Lambda(index_t i_dwn) { const auto i = first_cell + i_dwn * dwn_in_dim; const auto i_ = static_cast(i); - //if (dim == 1){ - // printf(" i %lu and %f \n", i, i_ ); - // } coord_t x_Cd { ZERO }, x_Ph { ZERO }; x_Cd[dim] = i_ + HALF; // TODO : change to convert by component diff --git a/src/output/writer.cpp b/src/output/writer.cpp index a63fafa51..b35e3ab17 100644 --- a/src/output/writer.cpp +++ b/src/output/writer.cpp @@ -83,18 +83,10 @@ namespace out { double l = loc_corner[i]; double n = loc_shape[i]; double f = math::ceil(l/d)*d-l; - m_flds_g_shape_dwn.push_back( - static_cast(math::ceil(g / d))); - m_flds_l_corner_dwn.push_back( - static_cast(math::ceil(l /d))); - m_flds_l_first.push_back( - static_cast(f)); - - m_flds_l_shape_dwn.push_back( - static_cast(math::ceil((n - f) / d))); - //m_flds_l_first.push_back( - // static_cast(loc_corner[i] / m_dwn[i]) * m_dwn[i] - - // loc_corner[i]); + m_flds_g_shape_dwn.push_back(static_cast(math::ceil(g / d))); + m_flds_l_corner_dwn.push_back(static_cast(math::ceil(l /d))); + m_flds_l_first.push_back(static_cast(f)); + m_flds_l_shape_dwn.push_back(static_cast(math::ceil((n - f) / d))); } @@ -229,10 +221,14 @@ namespace out { output_field = array_t { "output_field", slice.extent(0) }; Kokkos::deep_copy(output_field, slice); } else { - const auto dwn1 = dwn[0]; - const auto nx1_dwn = static_cast( - (field.extent(0) - 2 * N_GHOSTS) / dwn1); + + const auto dwn1 = dwn[0]; + const double first_cell1_d = first_cell[0]; + const double nx1_full = field.extent(0) - 2 * N_GHOSTS; const auto first_cell1 = first_cell[0]; + + const auto nx1_dwn = static_cast(math::ceil((nx1_full - first_cell1_d) / dwn1)); + output_field = array_t { "output_field", nx1_dwn }; Kokkos::parallel_for( "outputField", @@ -251,15 +247,17 @@ namespace out { slice.extent(1) }; Kokkos::deep_copy(output_field, slice); } else { - const auto dwn1 = dwn[0]; - const auto dwn2 = dwn[1]; - const auto nx1_dwn = static_cast( - (field.extent(0) - 2 * N_GHOSTS) / dwn1); - const auto nx2_dwn = static_cast( - (field.extent(1) - 2 * N_GHOSTS) / dwn2); + const auto dwn1 = dwn[0]; + const auto dwn2 = dwn[1]; + const double first_cell1_d = first_cell[0]; + const double first_cell2_d = first_cell[1]; + const double nx1_full = field.extent(0) - 2 * N_GHOSTS; + const double nx2_full = field.extent(1) - 2 * N_GHOSTS; const auto first_cell1 = first_cell[0]; const auto first_cell2 = first_cell[1]; - printf("%ld %ld : %ld %ld \n", nx1_dwn, nx2_dwn, first_cell1, first_cell2); + + const auto nx1_dwn = static_cast(math::ceil((nx1_full - first_cell1_d) / dwn1)); + const auto nx2_dwn = static_cast(math::ceil((nx2_full - first_cell2_d) / dwn2)); output_field = array_t { "output_field", nx1_dwn, nx2_dwn }; Kokkos::parallel_for( "outputField", @@ -282,18 +280,24 @@ namespace out { slice.extent(2) }; Kokkos::deep_copy(output_field, slice); } else { - const auto dwn1 = dwn[0]; - const auto dwn2 = dwn[1]; - const auto dwn3 = dwn[2]; - const auto nx1_dwn = static_cast( - (field.extent(0) - 2 * N_GHOSTS) / dwn1); - const auto nx2_dwn = static_cast( - (field.extent(1) - 2 * N_GHOSTS) / dwn2); - const auto nx3_dwn = static_cast( - (field.extent(2) - 2 * N_GHOSTS) / dwn3); + const auto dwn1 = dwn[0]; + const auto dwn2 = dwn[1]; + const auto dwn3 = dwn[2]; + const double first_cell1_d = first_cell[0]; + const double first_cell2_d = first_cell[1]; + const double first_cell3_d = first_cell[2]; + const double nx1_full = field.extent(0) - 2 * N_GHOSTS; + const double nx2_full = field.extent(1) - 2 * N_GHOSTS; + const double nx3_full = field.extent(2) - 2 * N_GHOSTS; const auto first_cell1 = first_cell[0]; const auto first_cell2 = first_cell[1]; - const auto first_cell3 = first_cell[2]; + const auto first_cell3 = first_cell[2]; + + const auto nx1_dwn = static_cast(math::ceil((nx1_full - first_cell1_d) / dwn1)); + const auto nx2_dwn = static_cast(math::ceil((nx2_full - first_cell2_d) / dwn2)); + const auto nx3_dwn = static_cast(math::ceil((nx3_full - first_cell3_d) / dwn3)); + + output_field = array_t { "output_field", nx1_dwn, nx2_dwn, nx3_dwn }; Kokkos::parallel_for( "outputField", From 905fb98167d5ffe304ad8d65372ea84deb82a1b3 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 4 Nov 2024 13:24:33 -0500 Subject: [PATCH 025/124] MPI write test --- .gitignore | 1 + src/checkpoint/reader.cpp | 2 +- src/engines/engine_init.cpp | 6 + src/framework/domain/output.cpp | 18 +-- src/output/tests/writer-mpi.cpp | 176 ++++++++++++++++++++++++------ src/output/tests/writer-nompi.cpp | 18 +-- src/output/writer.cpp | 93 ++++++++-------- 7 files changed, 218 insertions(+), 96 deletions(-) diff --git a/.gitignore b/.gitignore index 53d09b648..a1b05e751 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ venv/ # CMake testing files Testing/ +.clangd .schema.json *_old/ action-token diff --git a/src/checkpoint/reader.cpp b/src/checkpoint/reader.cpp index 66fcd6757..e89b7d384 100644 --- a/src/checkpoint/reader.cpp +++ b/src/checkpoint/reader.cpp @@ -35,7 +35,7 @@ namespace checkpoint { reader.Get(field_var, array_h.data(), adios2::Mode::Sync); Kokkos::deep_copy(array, array_h); } else { - raise::Error(fmt::format("Field variable: %s not found", field), HERE); + raise::Error(fmt::format("Field variable: %s not found", field.c_str()), HERE); } } diff --git a/src/engines/engine_init.cpp b/src/engines/engine_init.cpp index e4ce9fa5f..0239724e1 100644 --- a/src/engines/engine_init.cpp +++ b/src/engines/engine_init.cpp @@ -50,6 +50,7 @@ namespace ntt { }); } } else { +#if defined(OUTPUT_ENABLED) // read simulation data from the checkpoint raise::ErrorIf( m_params.template get("checkpoint.start_step") == 0, @@ -57,6 +58,11 @@ namespace ntt { HERE); logger::Checkpoint("Resuming simulation from a checkpoint", HERE); m_metadomain.ContinueFromCheckpoint(&m_adios, m_params); +#else + raise::Error( + "Resuming simulation from a checkpoint requires -D output=ON", + HERE); +#endif } } } diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index ac2108a75..c7cb6bb65 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -227,12 +227,14 @@ namespace ntt { const auto g_size = mesh().n_active()[dim]; const auto dwn_in_dim = dwn[dim]; - double n = l_size; - double d = dwn_in_dim; - double l = l_offset; - double f = math::ceil(l/d)*d-l; - const auto first_cell = static_cast(f); - const auto l_size_dwn = static_cast(math::ceil((n-f) / d)); + + const double n = l_size; + const double d = dwn_in_dim; + const double l = l_offset; + const double f = math::ceil(l / d) * d - l; + + const auto first_cell = static_cast(f); + const auto l_size_dwn = static_cast(math::ceil((n - f) / d)); const auto is_last = l_offset + l_size == g_size; @@ -244,7 +246,7 @@ namespace ntt { const auto offset = (incl_ghosts ? N_GHOSTS : 0); const auto ncells = l_size_dwn; - + const auto& metric = local_domain->mesh.metric; Kokkos::parallel_for( @@ -255,7 +257,7 @@ namespace ntt { const auto i_ = static_cast(i); coord_t x_Cd { ZERO }, x_Ph { ZERO }; x_Cd[dim] = i_ + HALF; - // TODO : change to convert by component + // TODO : change to convert by component metric.template convert(x_Cd, x_Ph); xc(offset + i_dwn) = x_Ph[dim]; x_Cd[dim] = i_; diff --git a/src/output/tests/writer-mpi.cpp b/src/output/tests/writer-mpi.cpp index 074b9acf0..c2729f658 100644 --- a/src/output/tests/writer-mpi.cpp +++ b/src/output/tests/writer-mpi.cpp @@ -2,9 +2,7 @@ #include "global.h" #include "arch/mpi_aliases.h" -#include "utils/formatting.h" -#include "output/fields.h" #include "output/writer.h" #include @@ -14,7 +12,6 @@ #include #include -#include #include #include @@ -24,49 +21,154 @@ void cleanup() { fs::remove(tempfile_path); } +#define CEILDIV(a, b) \ + (static_cast(math::ceil(static_cast(a) / static_cast(b)))) + auto main(int argc, char* argv[]) -> int { Kokkos::initialize(argc, argv); MPI_Init(&argc, &argv); - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + int mpi_rank, mpi_size; + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); try { using namespace ntt; + constexpr auto nx1 = 10; + constexpr auto nx1_gh = nx1 + 2 * N_GHOSTS; + constexpr auto i1min = N_GHOSTS; + constexpr auto i1max = nx1 + N_GHOSTS; + constexpr auto dwn1 = 3; + + ndfield_t field { "fld", nx1_gh }; + std::vector field_names; + + { + // fill data + Kokkos::parallel_for( + "fill", + CreateRangePolicy({ i1min }, { i1max }), + Lambda(index_t i1) { + const auto i1_ = static_cast(i1); + field(i1, 0) = i1_; + field(i1, 1) = -i1_; + field(i1, 2) = SQR(i1_); + }); + } adios2::ADIOS adios { MPI_COMM_WORLD }; - auto writer = out::Writer(); - writer.init(&adios, "hdf5", "test"); - writer.defineMeshLayout({ static_cast(size) * 10 }, - { static_cast(rank) * 10 }, - { 10 }, - { 1 }, - false, - Coord::Cart); - writer.defineFieldOutputs(SimEngine::SRPIC, { "E" }); - - ndfield_t field { "fld", 10 + 2 * N_GHOSTS }; - Kokkos::parallel_for( - "fill", - CreateRangePolicy({ N_GHOSTS }, { 10 + N_GHOSTS }), - Lambda(index_t i1) { - field(i1, 0) = i1; - field(i1, 1) = -(real_t)(i1); - field(i1, 2) = i1 / 2; - }); - std::vector names; - std::vector addresses; - for (auto i = 0; i < 3; ++i) { - names.push_back(writer.fieldWriters()[0].name(i)); - addresses.push_back(i); + + { + // write + auto writer = out::Writer(); + writer.init(&adios, "hdf5", "test"); + writer.defineMeshLayout({ static_cast(mpi_size) * nx1 }, + { static_cast(mpi_rank) * nx1 }, + { nx1 }, + { dwn1 }, + false, + Coord::Cart); + writer.defineFieldOutputs(SimEngine::SRPIC, { "E" }); + + std::vector addresses; + for (auto i = 0; i < 3; ++i) { + field_names.push_back(writer.fieldWriters()[0].name(i)); + addresses.push_back(i); + } + writer.beginWriting(0, 0.0); + writer.writeField(field_names, field, addresses); + writer.endWriting(); + + writer.beginWriting(1, 0.1); + writer.writeField(field_names, field, addresses); + writer.endWriting(); + adios.ExitComputationBlock(); } - writer.beginWriting(0, 0.0); - writer.writeField(names, field, addresses); - writer.endWriting(); - writer.beginWriting(1, 0.1); - writer.writeField(names, field, addresses); - writer.endWriting(); + { + // read + adios2::IO io = adios.DeclareIO("read-test"); + io.SetEngine("hdf5"); + adios2::Engine reader = io.Open("test.h5", adios2::Mode::Read, MPI_COMM_SELF); + raise::ErrorIf(io.InquireAttribute("NGhosts").Data()[0] != 0, + "NGhosts is not correct", + HERE); + raise::ErrorIf(io.InquireAttribute("Dimension").Data()[0] != 1, + "Dimension is not correct", + HERE); + for (std::size_t step = 0; reader.BeginStep() == adios2::StepStatus::OK; + ++step) { + std::size_t step_read; + long double time_read; + + reader.Get(io.InquireVariable("Step"), step_read); + reader.Get(io.InquireVariable("Time"), time_read); + raise::ErrorIf(step_read != step, "Step is not correct", HERE); + raise::ErrorIf((float)time_read != (float)step * 0.1f, + "Time is not correct", + HERE); + + const auto l_size = nx1; + const auto l_offset = nx1 * mpi_rank; + const auto g_size = nx1 * mpi_size; + + const double n = l_size; + const double d = dwn1; + const double l = l_offset; + const double f = math::ceil(l / d) * d - l; + + const auto first_cell = static_cast(f); + const auto l_size_dwn = static_cast(math::ceil((n - f) / d)); + const auto l_corner_dwn = static_cast(math::ceil(l / d)); + + array_t field_read {}; + int cntr = 0; + for (const auto& name : field_names) { + auto fieldVar = io.InquireVariable(name); + if (fieldVar) { + raise::ErrorIf(fieldVar.Shape().size() != 1, + fmt::format("%s is not 1D", name.c_str()), + HERE); + auto dims = fieldVar.Shape(); + std::size_t nx1_r = dims[0]; + raise::ErrorIf((nx1_r != CEILDIV(nx1 * mpi_size, dwn1)), + fmt::format("%s = %ld is not %d", + name.c_str(), + nx1_r, + CEILDIV(nx1 * mpi_size, dwn1)), + HERE); + + fieldVar.SetSelection( + adios2::Box({ l_corner_dwn }, { l_size_dwn })); + field_read = array_t(name, l_size_dwn); + auto field_read_h = Kokkos::create_mirror_view(field_read); + reader.Get(fieldVar, field_read_h.data(), adios2::Mode::Sync); + Kokkos::deep_copy(field_read, field_read_h); + + Kokkos::parallel_for( + "check", + CreateRangePolicy({ 0 }, { l_size_dwn }), + Lambda(index_t i1) { + if (not cmp::AlmostEqual( + field_read(i1), + field(i1 * dwn1 + first_cell + i1min, cntr))) { + printf("\n:::::::::::::::\nfield_read(%ld) = %f != " + "field(%ld, %d) = %f\n:::::::::::::::\n", + i1, + field_read(i1), + i1 * dwn1 + first_cell + i1min, + cntr, + field(i1 * dwn1 + first_cell + i1min, cntr)); + raise::KernelError(HERE, "Field is not read correctly"); + } + }); + } else { + raise::Error("Field not found", HERE); + } + ++cntr; + } + } + reader.Close(); + } } catch (std::exception& e) { std::cerr << e.what() << std::endl; @@ -82,3 +184,5 @@ auto main(int argc, char* argv[]) -> int { Kokkos::finalize(); return 0; } + +#undef CEILDIV diff --git a/src/output/tests/writer-nompi.cpp b/src/output/tests/writer-nompi.cpp index 4c032094b..3fe42bf1b 100644 --- a/src/output/tests/writer-nompi.cpp +++ b/src/output/tests/writer-nompi.cpp @@ -3,7 +3,6 @@ #include "utils/formatting.h" -#include "output/fields.h" #include "output/writer.h" #include @@ -12,7 +11,6 @@ #include #include -#include #include #include @@ -24,6 +22,9 @@ void cleanup() { fs::remove(tempfile_path); } +#define CEILDIV(a, b) \ + (static_cast(math::ceil(static_cast(a) / static_cast(b)))) + auto main(int argc, char* argv[]) -> int { Kokkos::initialize(argc, argv); @@ -131,16 +132,17 @@ auto main(int argc, char* argv[]) -> int { std::size_t nx1_r = dims[0]; std::size_t nx2_r = dims[1]; std::size_t nx3_r = dims[2]; - raise::ErrorIf((nx1_r != nx1 / dwn1) || (nx2_r != nx2 / dwn2) || - (nx3_r != nx3 / dwn3), + raise::ErrorIf((nx1_r != CEILDIV(nx1, dwn1)) || + (nx2_r != CEILDIV(nx2, dwn2)) || + (nx3_r != CEILDIV(nx3, dwn3)), fmt::format("%s = %ldx%ldx%ld is not %dx%dx%d", name.c_str(), nx1_r, nx2_r, nx3_r, - nx1 / dwn1, - nx2 / dwn2, - nx3 / dwn3), + CEILDIV(nx1, dwn1), + CEILDIV(nx2, dwn2), + CEILDIV(nx3, dwn3)), HERE); fieldVar.SetSelection( @@ -195,3 +197,5 @@ auto main(int argc, char* argv[]) -> int { Kokkos::finalize(); return 0; } + +#undef CEILDIV diff --git a/src/output/writer.cpp b/src/output/writer.cpp index b35e3ab17..4ba0ea14c 100644 --- a/src/output/writer.cpp +++ b/src/output/writer.cpp @@ -78,17 +78,17 @@ namespace out { "Downsampling with ghosts not supported", HERE); - double g = glob_shape[i]; - double d = m_dwn[i]; - double l = loc_corner[i]; - double n = loc_shape[i]; - double f = math::ceil(l/d)*d-l; + const double g = glob_shape[i]; + const double d = m_dwn[i]; + const double l = loc_corner[i]; + const double n = loc_shape[i]; + const double f = math::ceil(l / d) * d - l; m_flds_g_shape_dwn.push_back(static_cast(math::ceil(g / d))); - m_flds_l_corner_dwn.push_back(static_cast(math::ceil(l /d))); + m_flds_l_corner_dwn.push_back(static_cast(math::ceil(l / d))); m_flds_l_first.push_back(static_cast(f)); - m_flds_l_shape_dwn.push_back(static_cast(math::ceil((n - f) / d))); + m_flds_l_shape_dwn.push_back( + static_cast(math::ceil((n - f) / d))); } - m_io.DefineAttribute("NGhosts", incl_ghosts ? N_GHOSTS : 0); m_io.DefineAttribute("Dimension", m_flds_g_shape.size()); @@ -222,14 +222,15 @@ namespace out { Kokkos::deep_copy(output_field, slice); } else { - const auto dwn1 = dwn[0]; - const double first_cell1_d = first_cell[0]; - const double nx1_full = field.extent(0) - 2 * N_GHOSTS; - const auto first_cell1 = first_cell[0]; + const auto dwn1 = dwn[0]; + const double first_cell1_d = first_cell[0]; + const double nx1_full = field.extent(0) - 2 * N_GHOSTS; + const auto first_cell1 = first_cell[0]; - const auto nx1_dwn = static_cast(math::ceil((nx1_full - first_cell1_d) / dwn1)); - - output_field = array_t { "output_field", nx1_dwn }; + const auto nx1_dwn = static_cast( + math::ceil((nx1_full - first_cell1_d) / dwn1)); + + output_field = array_t { "output_field", nx1_dwn }; Kokkos::parallel_for( "outputField", nx1_dwn, @@ -247,17 +248,19 @@ namespace out { slice.extent(1) }; Kokkos::deep_copy(output_field, slice); } else { - const auto dwn1 = dwn[0]; - const auto dwn2 = dwn[1]; - const double first_cell1_d = first_cell[0]; - const double first_cell2_d = first_cell[1]; - const double nx1_full = field.extent(0) - 2 * N_GHOSTS; - const double nx2_full = field.extent(1) - 2 * N_GHOSTS; - const auto first_cell1 = first_cell[0]; - const auto first_cell2 = first_cell[1]; - - const auto nx1_dwn = static_cast(math::ceil((nx1_full - first_cell1_d) / dwn1)); - const auto nx2_dwn = static_cast(math::ceil((nx2_full - first_cell2_d) / dwn2)); + const auto dwn1 = dwn[0]; + const auto dwn2 = dwn[1]; + const double first_cell1_d = first_cell[0]; + const double first_cell2_d = first_cell[1]; + const double nx1_full = field.extent(0) - 2 * N_GHOSTS; + const double nx2_full = field.extent(1) - 2 * N_GHOSTS; + const auto first_cell1 = first_cell[0]; + const auto first_cell2 = first_cell[1]; + + const auto nx1_dwn = static_cast( + math::ceil((nx1_full - first_cell1_d) / dwn1)); + const auto nx2_dwn = static_cast( + math::ceil((nx2_full - first_cell2_d) / dwn2)); output_field = array_t { "output_field", nx1_dwn, nx2_dwn }; Kokkos::parallel_for( "outputField", @@ -280,24 +283,26 @@ namespace out { slice.extent(2) }; Kokkos::deep_copy(output_field, slice); } else { - const auto dwn1 = dwn[0]; - const auto dwn2 = dwn[1]; - const auto dwn3 = dwn[2]; - const double first_cell1_d = first_cell[0]; - const double first_cell2_d = first_cell[1]; - const double first_cell3_d = first_cell[2]; - const double nx1_full = field.extent(0) - 2 * N_GHOSTS; - const double nx2_full = field.extent(1) - 2 * N_GHOSTS; - const double nx3_full = field.extent(2) - 2 * N_GHOSTS; - const auto first_cell1 = first_cell[0]; - const auto first_cell2 = first_cell[1]; - const auto first_cell3 = first_cell[2]; - - const auto nx1_dwn = static_cast(math::ceil((nx1_full - first_cell1_d) / dwn1)); - const auto nx2_dwn = static_cast(math::ceil((nx2_full - first_cell2_d) / dwn2)); - const auto nx3_dwn = static_cast(math::ceil((nx3_full - first_cell3_d) / dwn3)); - - + const auto dwn1 = dwn[0]; + const auto dwn2 = dwn[1]; + const auto dwn3 = dwn[2]; + const double first_cell1_d = first_cell[0]; + const double first_cell2_d = first_cell[1]; + const double first_cell3_d = first_cell[2]; + const double nx1_full = field.extent(0) - 2 * N_GHOSTS; + const double nx2_full = field.extent(1) - 2 * N_GHOSTS; + const double nx3_full = field.extent(2) - 2 * N_GHOSTS; + const auto first_cell1 = first_cell[0]; + const auto first_cell2 = first_cell[1]; + const auto first_cell3 = first_cell[2]; + + const auto nx1_dwn = static_cast( + math::ceil((nx1_full - first_cell1_d) / dwn1)); + const auto nx2_dwn = static_cast( + math::ceil((nx2_full - first_cell2_d) / dwn2)); + const auto nx3_dwn = static_cast( + math::ceil((nx3_full - first_cell3_d) / dwn3)); + output_field = array_t { "output_field", nx1_dwn, nx2_dwn, nx3_dwn }; Kokkos::parallel_for( "outputField", From af5ac780f96043ce277b1e2df0c76d771ade6e93 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 4 Nov 2024 14:22:13 -0500 Subject: [PATCH 026/124] cmake formatting --- CMakeLists.txt | 67 ++++++------ cmake/MPIConfig.cmake | 3 +- cmake/adios2Config.cmake | 24 +++-- cmake/config.cmake | 31 ++++-- cmake/defaults.cmake | 76 ++++++++++---- cmake/dependencies.cmake | 106 ++++++++++++------- cmake/kokkosConfig.cmake | 52 +++++++--- cmake/report.cmake | 155 +++++++++++++++++----------- cmake/styling.cmake | 27 +++-- cmake/tests.cmake | 37 ++++--- setups/CMakeLists.txt | 28 ++--- src/CMakeLists.txt | 35 ++++--- src/archetypes/CMakeLists.txt | 20 ++-- src/archetypes/tests/CMakeLists.txt | 8 +- src/checkpoint/CMakeLists.txt | 33 +++--- src/checkpoint/tests/CMakeLists.txt | 12 ++- src/engines/CMakeLists.txt | 58 ++++++----- src/framework/CMakeLists.txt | 71 +++++++------ src/framework/tests/CMakeLists.txt | 50 +++++---- src/global/CMakeLists.txt | 48 ++++----- src/global/tests/CMakeLists.txt | 12 ++- src/kernels/CMakeLists.txt | 20 ++-- src/kernels/tests/CMakeLists.txt | 8 +- src/metrics/CMakeLists.txt | 16 ++- src/metrics/tests/CMakeLists.txt | 13 ++- src/output/CMakeLists.txt | 37 ++++--- src/output/tests/CMakeLists.txt | 28 +++-- 27 files changed, 662 insertions(+), 413 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 62319559b..4ee00d1b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,12 +8,13 @@ project( VERSION 1.2.0 LANGUAGES CXX C) add_compile_options("-D ENTITY_VERSION=\"${PROJECT_VERSION}\"") -execute_process(COMMAND - bash -c "git diff --quiet src/ && echo $(git rev-parse HEAD) || echo $(git rev-parse HEAD)-mod" +execute_process( + COMMAND + bash -c + "git diff --quiet src/ && echo $(git rev-parse HEAD) || echo $(git rev-parse HEAD)-mod" WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" OUTPUT_VARIABLE GIT_HASH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE -) + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) message(STATUS "Git hash: ${GIT_HASH}") add_compile_options("-D ENTITY_GIT_HASH=\"${GIT_HASH}\"") @@ -25,56 +26,57 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/defaults.cmake) # defaults set(DEBUG - ${default_debug} - CACHE BOOL "Debug mode") + ${default_debug} + CACHE BOOL "Debug mode") set(precision - ${default_precision} - CACHE STRING "Precision") + ${default_precision} + CACHE STRING "Precision") set(pgen - ${default_pgen} - CACHE STRING "Problem generator") + ${default_pgen} + CACHE STRING "Problem generator") set(gui - ${default_gui} - CACHE BOOL "Use GUI [nttiny]") + ${default_gui} + CACHE BOOL "Use GUI [nttiny]") set(output - ${default_output} - CACHE BOOL "Enable output") + ${default_output} + CACHE BOOL "Enable output") set(mpi - ${default_mpi} - CACHE BOOL "Use MPI") + ${default_mpi} + CACHE BOOL "Use MPI") # -------------------------- Compilation settings -------------------------- # set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if(${DEBUG} STREQUAL "OFF") set(CMAKE_BUILD_TYPE - Release - CACHE STRING "CMake build type") + Release + CACHE STRING "CMake build type") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG") else() set(CMAKE_BUILD_TYPE - Debug - CACHE STRING "CMake build type") + Debug + CACHE STRING "CMake build type") set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -DDEBUG -Wall -Wextra -Wno-unknown-pragmas") + "${CMAKE_CXX_FLAGS} -DDEBUG -Wall -Wextra -Wno-unknown-pragmas") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs") # options set(precisions - "single" "double" - CACHE STRING "Precisions") + "single" "double" + CACHE STRING "Precisions") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.cmake) # ------------------------- Third-Party Tests ------------------------------ # set(BUILD_TESTING - OFF - CACHE BOOL "Build tests") + OFF + CACHE BOOL "Build tests") # ------------------------ Third-party dependencies ------------------------ # include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/kokkosConfig.cmake) @@ -98,12 +100,12 @@ endif() if(${output}) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/adios2Config.cmake) find_or_fetch_dependency(adios2 FALSE) - if (NOT DEFINED ENV{HDF5_ROOT}) + if(NOT DEFINED ENV{HDF5_ROOT}) set(USE_CUSTOM_HDF5 OFF) - if (DEFINED ENV{CONDA_PREFIX}) + if(DEFINED ENV{CONDA_PREFIX}) execute_process(COMMAND bash -c "conda list | grep \"hdf5\" -q" - RESULT_VARIABLE HDF5_INSTALLED) - if (HDF5_INSTALLED EQUAL 0) + RESULT_VARIABLE HDF5_INSTALLED) + if(HDF5_INSTALLED EQUAL 0) set(HDF5_ROOT $ENV{CONDA_PREFIX}) else() set(USE_CUSTOM_HDF5 ON) @@ -111,8 +113,11 @@ if(${output}) else() set(USE_CUSTOM_HDF5 ON) endif() - if (USE_CUSTOM_HDF5) - message(FATAL_ERROR "HDF5_ROOT is not set. Please set it to the root of the HDF5 installation") + if(USE_CUSTOM_HDF5) + message( + FATAL_ERROR + "HDF5_ROOT is not set. Please set it to the root of the HDF5 installation" + ) endif() endif() find_package(HDF5 REQUIRED) diff --git a/cmake/MPIConfig.cmake b/cmake/MPIConfig.cmake index b426641ec..d1bfeaab2 100644 --- a/cmake/MPIConfig.cmake +++ b/cmake/MPIConfig.cmake @@ -1,3 +1,4 @@ find_package(MPI REQUIRED) include_directories(${MPI_CXX_INCLUDE_PATH}) -add_compile_options("-D MPI_ENABLED") \ No newline at end of file +add_compile_options("-D MPI_ENABLED") + diff --git a/cmake/adios2Config.cmake b/cmake/adios2Config.cmake index 16c0c30c7..5c480f3d8 100644 --- a/cmake/adios2Config.cmake +++ b/cmake/adios2Config.cmake @@ -1,15 +1,27 @@ # ----------------------------- Adios2 settings ---------------------------- # -set(ADIOS2_BUILD_EXAMPLES OFF CACHE BOOL "Build ADIOS2 examples") +set(ADIOS2_BUILD_EXAMPLES + OFF + CACHE BOOL "Build ADIOS2 examples") # Language support -set(ADIOS2_USE_Python OFF CACHE BOOL "Use Python for ADIOS2") -set(ADIOS2_USE_Fortran OFF CACHE BOOL "Use Fortran for ADIOS2") +set(ADIOS2_USE_Python + OFF + CACHE BOOL "Use Python for ADIOS2") +set(ADIOS2_USE_Fortran + OFF + CACHE BOOL "Use Fortran for ADIOS2") # Format/compression support -set(ADIOS2_USE_ZeroMQ OFF CACHE BOOL "Use ZeroMQ for ADIOS2") +set(ADIOS2_USE_ZeroMQ + OFF + CACHE BOOL "Use ZeroMQ for ADIOS2") -set(ADIOS2_USE_MPI ${mpi} CACHE BOOL "Use MPI for ADIOS2") +set(ADIOS2_USE_MPI + ${mpi} + CACHE BOOL "Use MPI for ADIOS2") -set(ADIOS2_USE_CUDA OFF CACHE BOOL "Use CUDA for ADIOS2") +set(ADIOS2_USE_CUDA + OFF + CACHE BOOL "Use CUDA for ADIOS2") add_compile_options("-D OUTPUT_ENABLED") diff --git a/cmake/config.cmake b/cmake/config.cmake index fa18a87eb..58dd467e9 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -3,7 +3,10 @@ function(set_precision precision_name) list(FIND precisions ${precision_name} PRECISION_FOUND) if(${PRECISION_FOUND} EQUAL -1) - message(FATAL_ERROR "Invalid precision: ${precision_name}\nValid options are: ${precisions}") + message( + FATAL_ERROR + "Invalid precision: ${precision_name}\nValid options are: ${precisions}" + ) endif() if(${precision_name} STREQUAL "single") @@ -13,19 +16,31 @@ endfunction() # ---------------------------- Problem generator --------------------------- # function(set_problem_generator pgen_name) - file(GLOB_RECURSE PGENS "${CMAKE_CURRENT_SOURCE_DIR}/setups/**/pgen.hpp" "${CMAKE_CURRENT_SOURCE_DIR}/setups/pgen.hpp") + file(GLOB_RECURSE PGENS "${CMAKE_CURRENT_SOURCE_DIR}/setups/**/pgen.hpp" + "${CMAKE_CURRENT_SOURCE_DIR}/setups/pgen.hpp") foreach(PGEN ${PGENS}) get_filename_component(PGEN_NAME ${PGEN} DIRECTORY) - string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/setups/" "" PGEN_NAME ${PGEN_NAME}) - string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/setups" "" PGEN_NAME ${PGEN_NAME}) + string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/setups/" "" PGEN_NAME + ${PGEN_NAME}) + string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/setups" "" PGEN_NAME + ${PGEN_NAME}) list(APPEND PGEN_NAMES ${PGEN_NAME}) endforeach() list(FIND PGEN_NAMES ${pgen_name} PGEN_FOUND) if(NOT ${pgen_name} STREQUAL "." AND ${PGEN_FOUND} EQUAL -1) - message(FATAL_ERROR "Invalid problem generator: ${pgen_name}\nValid options are: ${PGEN_NAMES}") + message( + FATAL_ERROR + "Invalid problem generator: ${pgen_name}\nValid options are: ${PGEN_NAMES}" + ) endif() - set(PGEN ${pgen_name} PARENT_SCOPE) + set(PGEN + ${pgen_name} + PARENT_SCOPE) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/setups/${pgen_name}) - set(PGEN_FOUND TRUE PARENT_SCOPE) - set(problem_generators ${PGEN_NAMES} PARENT_SCOPE) + set(PGEN_FOUND + TRUE + PARENT_SCOPE) + set(problem_generators + ${PGEN_NAMES} + PARENT_SCOPE) endfunction() diff --git a/cmake/defaults.cmake b/cmake/defaults.cmake index f70120e0d..46b4609c5 100644 --- a/cmake/defaults.cmake +++ b/cmake/defaults.cmake @@ -1,62 +1,100 @@ # ----------------------------- Defaults ---------------------------------- # if(DEFINED ENV{Entity_ENABLE_DEBUG}) - set(default_debug $ENV{Entity_ENABLE_DEBUG} CACHE INTERNAL "Default flag for debug mode") + set(default_debug + $ENV{Entity_ENABLE_DEBUG} + CACHE INTERNAL "Default flag for debug mode") else() - set(default_debug OFF CACHE INTERNAL "Default flag for debug mode") + set(default_debug + OFF + CACHE INTERNAL "Default flag for debug mode") endif() set_property(CACHE default_debug PROPERTY TYPE BOOL) -set(default_engine "pic" CACHE INTERNAL "Default engine") -set(default_precision "single" CACHE INTERNAL "Default precision") -set(default_pgen "." CACHE INTERNAL "Default problem generator") -set(default_sr_metric "minkowski" CACHE INTERNAL "Default SR metric") -set(default_gr_metric "kerr_schild" CACHE INTERNAL "Default GR metric") +set(default_engine + "pic" + CACHE INTERNAL "Default engine") +set(default_precision + "single" + CACHE INTERNAL "Default precision") +set(default_pgen + "." + CACHE INTERNAL "Default problem generator") +set(default_sr_metric + "minkowski" + CACHE INTERNAL "Default SR metric") +set(default_gr_metric + "kerr_schild" + CACHE INTERNAL "Default GR metric") if(DEFINED ENV{Entity_ENABLE_OUTPUT}) - set(default_output $ENV{Entity_ENABLE_OUTPUT} CACHE INTERNAL "Default flag for output") + set(default_output + $ENV{Entity_ENABLE_OUTPUT} + CACHE INTERNAL "Default flag for output") else() - set(default_output OFF CACHE INTERNAL "Default flag for output") + set(default_output + OFF + CACHE INTERNAL "Default flag for output") endif() set_property(CACHE default_output PROPERTY TYPE BOOL) if(DEFINED ENV{Entity_ENABLE_GUI}) - set(default_gui $ENV{Entity_ENABLE_GUI} CACHE INTERNAL "Default flag for GUI") + set(default_gui + $ENV{Entity_ENABLE_GUI} + CACHE INTERNAL "Default flag for GUI") else() - set(default_gui OFF CACHE INTERNAL "Default flag for GUI") + set(default_gui + OFF + CACHE INTERNAL "Default flag for GUI") endif() set_property(CACHE default_gui PROPERTY TYPE BOOL) if(DEFINED ENV{Kokkos_ENABLE_CUDA}) - set(default_KOKKOS_ENABLE_CUDA $ENV{Kokkos_ENABLE_CUDA} CACHE INTERNAL "Default flag for CUDA") + set(default_KOKKOS_ENABLE_CUDA + $ENV{Kokkos_ENABLE_CUDA} + CACHE INTERNAL "Default flag for CUDA") else() - set(default_KOKKOS_ENABLE_CUDA OFF CACHE INTERNAL "Default flag for CUDA") + set(default_KOKKOS_ENABLE_CUDA + OFF + CACHE INTERNAL "Default flag for CUDA") endif() set_property(CACHE default_KOKKOS_ENABLE_CUDA PROPERTY TYPE BOOL) if(DEFINED ENV{Kokkos_ENABLE_HIP}) - set(default_KOKKOS_ENABLE_HIP $ENV{Kokkos_ENABLE_HIP} CACHE INTERNAL "Default flag for HIP") + set(default_KOKKOS_ENABLE_HIP + $ENV{Kokkos_ENABLE_HIP} + CACHE INTERNAL "Default flag for HIP") else() - set(default_KOKKOS_ENABLE_HIP OFF CACHE INTERNAL "Default flag for HIP") + set(default_KOKKOS_ENABLE_HIP + OFF + CACHE INTERNAL "Default flag for HIP") endif() set_property(CACHE default_KOKKOS_ENABLE_HIP PROPERTY TYPE BOOL) if(DEFINED ENV{Kokkos_ENABLE_OPENMP}) - set(default_KOKKOS_ENABLE_OPENMP $ENV{Kokkos_ENABLE_OPENMP} CACHE INTERNAL "Default flag for OpenMP") + set(default_KOKKOS_ENABLE_OPENMP + $ENV{Kokkos_ENABLE_OPENMP} + CACHE INTERNAL "Default flag for OpenMP") else() - set(default_KOKKOS_ENABLE_OPENMP OFF CACHE INTERNAL "Default flag for OpenMP") + set(default_KOKKOS_ENABLE_OPENMP + OFF + CACHE INTERNAL "Default flag for OpenMP") endif() set_property(CACHE default_KOKKOS_ENABLE_OPENMP PROPERTY TYPE BOOL) if(DEFINED ENV{Entity_ENABLE_MPI}) - set(default_mpi $ENV{Entity_ENABLE_MPI} CACHE INTERNAL "Default flag for MPI") + set(default_mpi + $ENV{Entity_ENABLE_MPI} + CACHE INTERNAL "Default flag for MPI") else() - set(default_mpi OFF CACHE INTERNAL "Default flag for MPI") + set(default_mpi + OFF + CACHE INTERNAL "Default flag for MPI") endif() set_property(CACHE default_mpi PROPERTY TYPE BOOL) diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index b143befdf..06a3e6a1f 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -1,24 +1,34 @@ -set(Kokkos_REPOSITORY https://github.com/kokkos/kokkos.git CACHE STRING "Kokkos repository") -set(plog_REPOSITORY https://github.com/SergiusTheBest/plog.git CACHE STRING "plog repository") -set(toml11_REPOSITORY https://github.com/ToruNiina/toml11 CACHE STRING "toml11 repository") +set(Kokkos_REPOSITORY + https://github.com/kokkos/kokkos.git + CACHE STRING "Kokkos repository") +set(plog_REPOSITORY + https://github.com/SergiusTheBest/plog.git + CACHE STRING "plog repository") -# set (adios2_REPOSITORY https://github.com/ornladios/ADIOS2.git CACHE STRING "ADIOS2 repository") +# set (adios2_REPOSITORY https://github.com/ornladios/ADIOS2.git CACHE STRING +# "ADIOS2 repository") function(check_internet_connection) if(OFFLINE STREQUAL "ON") - set(FETCHCONTENT_FULLY_DISCONNECTED ON CACHE BOOL "Connection status") + set(FETCHCONTENT_FULLY_DISCONNECTED + ON + CACHE BOOL "Connection status") message(STATUS "${Blue}Offline mode.${ColorReset}") else() execute_process( COMMAND ping 8.8.8.8 -c 2 RESULT_VARIABLE NO_CONNECTION - OUTPUT_QUIET - ) + OUTPUT_QUIET) if(NO_CONNECTION GREATER 0) - set(FETCHCONTENT_FULLY_DISCONNECTED ON CACHE BOOL "Connection status") - message(STATUS "${Red}No internet connection. Fetching disabled.${ColorReset}") + set(FETCHCONTENT_FULLY_DISCONNECTED + ON + CACHE BOOL "Connection status") + message( + STATUS "${Red}No internet connection. Fetching disabled.${ColorReset}") else() - set(FETCHCONTENT_FULLY_DISCONNECTED OFF CACHE BOOL "Connection status") + set(FETCHCONTENT_FULLY_DISCONNECTED + OFF + CACHE BOOL "Connection status") message(STATUS "${Green}Internet connection established.${ColorReset}") endif() endif() @@ -30,66 +40,92 @@ function(find_or_fetch_dependency package_name header_only) endif() if(NOT ${package_name}_FOUND) - if(DEFINED ${package_name}_REPOSITORY AND NOT FETCHCONTENT_FULLY_DISCONNECTED) + if(DEFINED ${package_name}_REPOSITORY AND NOT + FETCHCONTENT_FULLY_DISCONNECTED) # fetching package - message(STATUS "${Blue}${package_name} not found. Fetching from ${${package_name}_REPOSITORY}${ColorReset}") + message( + STATUS + "${Blue}${package_name} not found. Fetching from ${${package_name}_REPOSITORY}${ColorReset}" + ) include(FetchContent) if(${package_name} STREQUAL "Kokkos") FetchContent_Declare( ${package_name} GIT_REPOSITORY ${${package_name}_REPOSITORY} - GIT_TAG 4.3.00 - ) + GIT_TAG 4.3.00) else() - FetchContent_Declare( - ${package_name} - GIT_REPOSITORY ${${package_name}_REPOSITORY} - ) + FetchContent_Declare(${package_name} + GIT_REPOSITORY ${${package_name}_REPOSITORY}) endif() FetchContent_MakeAvailable(${package_name}) set(lower_pckg_name ${package_name}) string(TOLOWER ${lower_pckg_name} lower_pckg_name) - set(${package_name}_SRC ${CMAKE_CURRENT_BINARY_DIR}/_deps/${lower_pckg_name}-src CACHE PATH "Path to ${package_name} src") - set(${package_name}_FETCHED TRUE CACHE BOOL "Whether ${package_name} was fetched") + set(${package_name}_SRC + ${CMAKE_CURRENT_BINARY_DIR}/_deps/${lower_pckg_name}-src + CACHE PATH "Path to ${package_name} src") + set(${package_name}_FETCHED + TRUE + CACHE BOOL "Whether ${package_name} was fetched") message(STATUS "${Green}${package_name} fetched.${ColorReset}") else() # get as submodule - message(STATUS "${Yellow}${package_name} not found. Using as submodule.${ColorReset}") + message( + STATUS + "${Yellow}${package_name} not found. Using as submodule.${ColorReset}" + ) - set(${package_name}_FETCHED FALSE CACHE BOOL "Whether ${package_name} was fetched") + set(${package_name}_FETCHED + FALSE + CACHE BOOL "Whether ${package_name} was fetched") if(NOT FETCHCONTENT_FULLY_DISCONNECTED) - message(STATUS "${GREEN}Updating ${package_name} submodule.${ColorReset}") + message( + STATUS "${GREEN}Updating ${package_name} submodule.${ColorReset}") execute_process( - COMMAND git submodule update --init --remote ${CMAKE_CURRENT_SOURCE_DIR}/extern/${package_name} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - ) + COMMAND git submodule update --init --remote + ${CMAKE_CURRENT_SOURCE_DIR}/extern/${package_name} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extern/${package_name} extern/${package_name}) - set(${package_name}_SRC ${CMAKE_CURRENT_SOURCE_DIR}/extern/${package_name} CACHE PATH "Path to ${package_name} src") - set(${package_name}_BUILD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/build/extern/${package_name} CACHE PATH "Path to ${package_name} build") + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extern/${package_name} + extern/${package_name}) + set(${package_name}_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/extern/${package_name} + CACHE PATH "Path to ${package_name} src") + set(${package_name}_BUILD_DIR + ${CMAKE_CURRENT_SOURCE_DIR}/build/extern/${package_name} + CACHE PATH "Path to ${package_name} build") endif() else() message(STATUS "${Green}${package_name} found.${ColorReset}") - set(${package_name}_FETCHED FALSE CACHE BOOL "Whether ${package_name} was fetched") - set(${package_name}_VERSION ${${package_name}_VERSION} CACHE INTERNAL "${package_name} version") + set(${package_name}_FETCHED + FALSE + CACHE BOOL "Whether ${package_name} was fetched") + set(${package_name}_VERSION + ${${package_name}_VERSION} + CACHE INTERNAL "${package_name} version") endif() if(${package_name} STREQUAL "adios2") if(NOT DEFINED adios2_VERSION OR adios2_VERSION STREQUAL "") - get_directory_property(adios2_VERSION DIRECTORY ${adios2_BUILD_DIR} DEFINITION ADIOS2_VERSION) - set(adios2_VERSION ${adios2_VERSION} CACHE INTERNAL "ADIOS2 version") + get_directory_property(adios2_VERSION DIRECTORY ${adios2_BUILD_DIR} + DEFINITION ADIOS2_VERSION) + set(adios2_VERSION + ${adios2_VERSION} + CACHE INTERNAL "ADIOS2 version") endif() endif() if(${package_name} STREQUAL "Kokkos") if(NOT DEFINED Kokkos_VERSION OR Kokkos_VERSION STREQUAL "") - get_directory_property(Kokkos_VERSION DIRECTORY ${Kokkos_SRC} DEFINITION Kokkos_VERSION) - set(Kokkos_VERSION ${Kokkos_VERSION} CACHE INTERNAL "Kokkos version") + get_directory_property(Kokkos_VERSION DIRECTORY ${Kokkos_SRC} DEFINITION + Kokkos_VERSION) + set(Kokkos_VERSION + ${Kokkos_VERSION} + CACHE INTERNAL "Kokkos version") endif() endif() endfunction() diff --git a/cmake/kokkosConfig.cmake b/cmake/kokkosConfig.cmake index 8928253ae..63c32622d 100644 --- a/cmake/kokkosConfig.cmake +++ b/cmake/kokkosConfig.cmake @@ -1,19 +1,41 @@ # ----------------------------- Kokkos settings ---------------------------- # if(${DEBUG} STREQUAL "OFF") - set(Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION ON CACHE BOOL "Kokkos aggressive vectorization") - set(Kokkos_ENABLE_COMPILER_WARNINGS OFF CACHE BOOL "Kokkos compiler warnings") - set(Kokkos_ENABLE_DEBUG OFF CACHE BOOL "Kokkos debug") - set(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK OFF CACHE BOOL "Kokkos debug bounds check") + set(Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION + ON + CACHE BOOL "Kokkos aggressive vectorization") + set(Kokkos_ENABLE_COMPILER_WARNINGS + OFF + CACHE BOOL "Kokkos compiler warnings") + set(Kokkos_ENABLE_DEBUG + OFF + CACHE BOOL "Kokkos debug") + set(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK + OFF + CACHE BOOL "Kokkos debug bounds check") else() - set(Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION OFF CACHE BOOL "Kokkos aggressive vectorization") - set(Kokkos_ENABLE_COMPILER_WARNINGS ON CACHE BOOL "Kokkos compiler warnings") - set(Kokkos_ENABLE_DEBUG ON CACHE BOOL "Kokkos debug") - set(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK ON CACHE BOOL "Kokkos debug bounds check") + set(Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION + OFF + CACHE BOOL "Kokkos aggressive vectorization") + set(Kokkos_ENABLE_COMPILER_WARNINGS + ON + CACHE BOOL "Kokkos compiler warnings") + set(Kokkos_ENABLE_DEBUG + ON + CACHE BOOL "Kokkos debug") + set(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK + ON + CACHE BOOL "Kokkos debug bounds check") endif() -set(Kokkos_ENABLE_HIP ${default_KOKKOS_ENABLE_HIP} CACHE BOOL "Enable HIP") -set(Kokkos_ENABLE_CUDA ${default_KOKKOS_ENABLE_CUDA} CACHE BOOL "Enable CUDA") -set(Kokkos_ENABLE_OPENMP ${default_KOKKOS_ENABLE_OPENMP} CACHE BOOL "Enable OpenMP") +set(Kokkos_ENABLE_HIP + ${default_KOKKOS_ENABLE_HIP} + CACHE BOOL "Enable HIP") +set(Kokkos_ENABLE_CUDA + ${default_KOKKOS_ENABLE_CUDA} + CACHE BOOL "Enable CUDA") +set(Kokkos_ENABLE_OPENMP + ${default_KOKKOS_ENABLE_OPENMP} + CACHE BOOL "Enable OpenMP") # set memory space if(${Kokkos_ENABLE_CUDA}) @@ -51,7 +73,11 @@ add_compile_options("-D HostExeSpace=${HOST_EXE_SPACE}") add_compile_options("-D HostMemSpace=${HOST_MEM_SPACE}") if(${BUILD_TESTING} STREQUAL "OFF") - set(Kokkos_ENABLE_TESTS OFF CACHE BOOL "Kokkos tests") + set(Kokkos_ENABLE_TESTS + OFF + CACHE BOOL "Kokkos tests") else() - set(Kokkos_ENABLE_TESTS ON CACHE BOOL "Kokkos tests") + set(Kokkos_ENABLE_TESTS + ON + CACHE BOOL "Kokkos tests") endif() diff --git a/cmake/report.cmake b/cmake/report.cmake index 6733dbcd4..13dde63f7 100644 --- a/cmake/report.cmake +++ b/cmake/report.cmake @@ -18,10 +18,22 @@ function(PadTo Text Padding Target Result) set(${rt} "${rt}") endif() - set(${Result} "${rt}" PARENT_SCOPE) + set(${Result} + "${rt}" + PARENT_SCOPE) endfunction() -function(PrintChoices Label Flag Choices Value Default Color OutputString Multiline Padding) +function( + PrintChoices + Label + Flag + Choices + Value + Default + Color + OutputString + Multiline + Padding) list(LENGTH "${Choices}" nchoices) set(rstring "") set(counter 0) @@ -35,14 +47,14 @@ function(PrintChoices Label Flag Choices Value Default Color OutputString Multil endif() set(rstring_i "${rstring_i}:") - PadTo("${rstring_i}" " " ${Padding} rstring_i) + padto("${rstring_i}" " " ${Padding} rstring_i) else() set(rstring_i "") if(NOT ${counter} EQUAL ${nchoices}) if(${Multiline} EQUAL 1) set(rstring_i "${rstring_i}\n") - PadTo("${rstring_i}" " " ${Padding} rstring_i) + padto("${rstring_i}" " " ${Padding} rstring_i) else() set(rstring_i "${rstring_i}/") endif() @@ -71,13 +83,16 @@ function(PrintChoices Label Flag Choices Value Default Color OutputString Multil set(rstring_i "") endforeach() - set(${OutputString} "${rstring}" PARENT_SCOPE) + set(${OutputString} + "${rstring}" + PARENT_SCOPE) endfunction() set(ON_OFF_VALUES "ON" "OFF") if(${PGEN_FOUND}) - PrintChoices("Problem generator" + printchoices( + "Problem generator" "pgen" "${problem_generators}" ${PGEN} @@ -85,11 +100,11 @@ if(${PGEN_FOUND}) "${Blue}" PGEN_REPORT 1 - 36 - ) + 36) endif() -PrintChoices("Precision" +printchoices( + "Precision" "precision" "${precisions}" ${precision} @@ -97,9 +112,9 @@ PrintChoices("Precision" "${Blue}" PRECISION_REPORT 1 - 36 -) -PrintChoices("Output" + 36) +printchoices( + "Output" "output" "${ON_OFF_VALUES}" ${output} @@ -107,9 +122,9 @@ PrintChoices("Output" "${Green}" OUTPUT_REPORT 0 - 36 -) -PrintChoices("GUI" + 36) +printchoices( + "GUI" "gui" "${ON_OFF_VALUES}" ${gui} @@ -117,9 +132,9 @@ PrintChoices("GUI" "${Green}" GUI_REPORT 0 - 36 -) -PrintChoices("MPI" + 36) +printchoices( + "MPI" "mpi" "${ON_OFF_VALUES}" ${mpi} @@ -127,9 +142,9 @@ PrintChoices("MPI" "${Green}" MPI_REPORT 0 - 42 -) -PrintChoices("Debug mode" + 42) +printchoices( + "Debug mode" "DEBUG" "${ON_OFF_VALUES}" ${DEBUG} @@ -137,10 +152,10 @@ PrintChoices("Debug mode" "${Green}" DEBUG_REPORT 0 - 42 -) + 42) -PrintChoices("CUDA" +printchoices( + "CUDA" "Kokkos_ENABLE_CUDA" "${ON_OFF_VALUES}" ${Kokkos_ENABLE_CUDA} @@ -148,9 +163,9 @@ PrintChoices("CUDA" "${Green}" CUDA_REPORT 0 - 42 -) -PrintChoices("HIP" + 42) +printchoices( + "HIP" "Kokkos_ENABLE_HIP" "${ON_OFF_VALUES}" ${Kokkos_ENABLE_HIP} @@ -158,9 +173,9 @@ PrintChoices("HIP" "${Green}" HIP_REPORT 0 - 42 -) -PrintChoices("OpenMP" + 42) +printchoices( + "OpenMP" "Kokkos_ENABLE_OPENMP" "${ON_OFF_VALUES}" ${Kokkos_ENABLE_OPENMP} @@ -168,10 +183,10 @@ PrintChoices("OpenMP" "${Green}" OPENMP_REPORT 0 - 42 -) + 42) -PrintChoices("C++ compiler" +printchoices( + "C++ compiler" "CMAKE_CXX_COMPILER" "${CMAKE_CXX_COMPILER} v${CMAKE_CXX_COMPILER_VERSION}" "${CMAKE_CXX_COMPILER} v${CMAKE_CXX_COMPILER_VERSION}" @@ -179,10 +194,10 @@ PrintChoices("C++ compiler" "${ColorReset}" CXX_COMPILER_REPORT 0 - 42 -) + 42) -PrintChoices("C compiler" +printchoices( + "C compiler" "CMAKE_C_COMPILER" "${CMAKE_C_COMPILER} v${CMAKE_C_COMPILER_VERSION}" "${CMAKE_C_COMPILER} v${CMAKE_C_COMPILER_VERSION}" @@ -190,21 +205,24 @@ PrintChoices("C compiler" "${ColorReset}" C_COMPILER_REPORT 0 - 42 -) + 42) get_cmake_property(_variableNames VARIABLES) -foreach (_variableName ${_variableNames}) - string(REGEX MATCH "Kokkos_ARCH_*" _isMatched ${_variableName}) - if(_isMatched) - get_property(isSet CACHE ${_variableName} PROPERTY VALUE) - if(isSet STREQUAL "ON") - string(REGEX REPLACE "Kokkos_ARCH_" "" ARCH ${_variableName}) - break() - endif() +foreach(_variableName ${_variableNames}) + string(REGEX MATCH "Kokkos_ARCH_*" _isMatched ${_variableName}) + if(_isMatched) + get_property( + isSet + CACHE ${_variableName} + PROPERTY VALUE) + if(isSet STREQUAL "ON") + string(REGEX REPLACE "Kokkos_ARCH_" "" ARCH ${_variableName}) + break() endif() + endif() endforeach() -PrintChoices("Architecture" +printchoices( + "Architecture" "Kokkos_ARCH_*" "${ARCH}" "${ARCH}" @@ -212,8 +230,7 @@ PrintChoices("Architecture" "${ColorReset}" ARCH_REPORT 0 - 42 -) + 42) if(${Kokkos_ENABLE_CUDA}) if("${CMAKE_CUDA_COMPILER}" STREQUAL "") @@ -225,11 +242,15 @@ if(${Kokkos_ENABLE_CUDA}) string(STRIP ${CUDACOMP} CUDACOMP) message(STATUS "CUDA compiler: ${CUDACOMP}") - execute_process(COMMAND bash -c "${CUDACOMP} --version | grep release | sed -e 's/.*release //' -e 's/,.*//'" + execute_process( + COMMAND + bash -c + "${CUDACOMP} --version | grep release | sed -e 's/.*release //' -e 's/,.*//'" OUTPUT_VARIABLE CUDACOMP_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) - PrintChoices("CUDA compiler" + printchoices( + "CUDA compiler" "CMAKE_CUDA_COMPILER" "${CUDACOMP}" "${CUDACOMP}" @@ -237,28 +258,37 @@ if(${Kokkos_ENABLE_CUDA}) "${ColorReset}" CUDA_COMPILER_REPORT 0 - 42 - ) + 42) endif() -if (${Kokkos_ENABLE_HIP}) - execute_process(COMMAND bash -c "hipcc --version | grep HIP | cut -d ':' -f 2 | tr -d ' '" +if(${Kokkos_ENABLE_HIP}) + execute_process( + COMMAND bash -c "hipcc --version | grep HIP | cut -d ':' -f 2 | tr -d ' '" OUTPUT_VARIABLE ROCM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) endif() set(DOT_SYMBOL "${ColorReset}.") -set(DOTTED_LINE_SYMBOL "${ColorReset}. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ") +set(DOTTED_LINE_SYMBOL + "${ColorReset}. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " +) -set(DASHED_LINE_SYMBOL "${ColorReset}....................................................................... ") +set(DASHED_LINE_SYMBOL + "${ColorReset}....................................................................... " +) if(NOT ${PROJECT_VERSION_TWEAK} EQUAL 0) - set(VERSION_SYMBOL "v${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}-rc${PROJECT_VERSION_TWEAK}") + set(VERSION_SYMBOL + "v${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}-rc${PROJECT_VERSION_TWEAK}" + ) else() - set(VERSION_SYMBOL "v${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} ") + set(VERSION_SYMBOL + "v${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} " + ) endif() -message("${Blue} __ __ +message( + "${Blue} __ __ /\\ \\__ __/\\ \\__ __ ___\\ \\ _\\/\\_\\ \\ _\\ __ __ / __ \\ / __ \\ \\ \\/\\/\\ \\ \\ \\/ /\\ \\/\\ \\ @@ -299,7 +329,7 @@ message(" ${DEBUG_REPORT}") message("${DASHED_LINE_SYMBOL}\nDependencies") -if (NOT "${CUDACOMP_VERSION}" STREQUAL "") +if(NOT "${CUDACOMP_VERSION}" STREQUAL "") message(" - CUDA:\tv${CUDACOMP_VERSION}") elseif(NOT "${ROCM_VERSION}" STREQUAL "") message(" - ROCm:\tv${ROCM_VERSION}") @@ -312,7 +342,8 @@ if(${HDF5_FOUND}) message(" - HDF5:\tv${HDF5_VERSION}") endif() -message("${DASHED_LINE_SYMBOL} +message( + "${DASHED_LINE_SYMBOL} Notes ${Dim}: Set flags with `cmake ... -D ${Magenta}${ColorReset}${Dim}=`, the ${Underline}default${ColorReset}${Dim} value : will be used unless the variable is explicitly set.${ColorReset} diff --git a/cmake/styling.cmake b/cmake/styling.cmake index fb9cfcc87..70c448fff 100644 --- a/cmake/styling.cmake +++ b/cmake/styling.cmake @@ -23,20 +23,17 @@ if(NOT WIN32) set(StrikeEnd "${Esc}[0m") endif() -# message("This is normal") -# message("${Red}This is Red${ColorReset}") -# message("${Green}This is Green${ColorReset}") -# message("${Yellow}This is Yellow${ColorReset}") -# message("${Blue}This is Blue${ColorReset}") -# message("${Magenta}This is Magenta${ColorReset}") -# message("${Cyan}This is Cyan${ColorReset}") -# message("${White}This is White${ColorReset}") -# message("${BoldRed}This is BoldRed${ColorReset}") -# message("${BoldGreen}This is BoldGreen${ColorReset}") -# message("${BoldYellow}This is BoldYellow${ColorReset}") -# message("${BoldBlue}This is BoldBlue${ColorReset}") +# message("This is normal") message("${Red}This is Red${ColorReset}") +# message("${Green}This is Green${ColorReset}") message("${Yellow}This is +# Yellow${ColorReset}") message("${Blue}This is Blue${ColorReset}") +# message("${Magenta}This is Magenta${ColorReset}") message("${Cyan}This is +# Cyan${ColorReset}") message("${White}This is White${ColorReset}") +# message("${BoldRed}This is BoldRed${ColorReset}") message("${BoldGreen}This is +# BoldGreen${ColorReset}") message("${BoldYellow}This is +# BoldYellow${ColorReset}") message("${BoldBlue}This is BoldBlue${ColorReset}") # message("${BoldMagenta}This is BoldMagenta${ColorReset}") -# message("${BoldCyan}This is BoldCyan${ColorReset}") -# message("${BoldWhite}This is BoldWhite\n\n${ColorReset}") +# message("${BoldCyan}This is BoldCyan${ColorReset}") message("${BoldWhite}This +# is BoldWhite\n\n${ColorReset}") + +# message() -# message() \ No newline at end of file diff --git a/cmake/tests.cmake b/cmake/tests.cmake index 643ac3d29..7820a5192 100644 --- a/cmake/tests.cmake +++ b/cmake/tests.cmake @@ -8,27 +8,36 @@ add_subdirectory(${SRC_DIR}/metrics ${CMAKE_CURRENT_BINARY_DIR}/metrics) add_subdirectory(${SRC_DIR}/kernels ${CMAKE_CURRENT_BINARY_DIR}/kernels) add_subdirectory(${SRC_DIR}/archetypes ${CMAKE_CURRENT_BINARY_DIR}/archetypes) add_subdirectory(${SRC_DIR}/framework ${CMAKE_CURRENT_BINARY_DIR}/framework) -if (${output}) +if(${output}) add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) add_subdirectory(${SRC_DIR}/checkpoint ${CMAKE_CURRENT_BINARY_DIR}/checkpoint) endif() -if (${mpi}) +if(${mpi}) # tests with mpi - if (${output}) - add_subdirectory(${SRC_DIR}/output/tests ${CMAKE_CURRENT_BINARY_DIR}/output/tests) - add_subdirectory(${SRC_DIR}/checkpoint/tests ${CMAKE_CURRENT_BINARY_DIR}/checkpoint/tests) - add_subdirectory(${SRC_DIR}/framework/tests ${CMAKE_CURRENT_BINARY_DIR}/framework/tests) + if(${output}) + add_subdirectory(${SRC_DIR}/output/tests + ${CMAKE_CURRENT_BINARY_DIR}/output/tests) + add_subdirectory(${SRC_DIR}/checkpoint/tests + ${CMAKE_CURRENT_BINARY_DIR}/checkpoint/tests) + add_subdirectory(${SRC_DIR}/framework/tests + ${CMAKE_CURRENT_BINARY_DIR}/framework/tests) endif() else() # tests without mpi - add_subdirectory(${SRC_DIR}/global/tests ${CMAKE_CURRENT_BINARY_DIR}/global/tests) - add_subdirectory(${SRC_DIR}/metrics/tests ${CMAKE_CURRENT_BINARY_DIR}/metrics/tests) - add_subdirectory(${SRC_DIR}/kernels/tests ${CMAKE_CURRENT_BINARY_DIR}/kernels/tests) - add_subdirectory(${SRC_DIR}/archetypes/tests ${CMAKE_CURRENT_BINARY_DIR}/archetypes/tests) - add_subdirectory(${SRC_DIR}/framework/tests ${CMAKE_CURRENT_BINARY_DIR}/framework/tests) - if (${output}) - add_subdirectory(${SRC_DIR}/output/tests ${CMAKE_CURRENT_BINARY_DIR}/output/tests) - add_subdirectory(${SRC_DIR}/checkpoint/tests ${CMAKE_CURRENT_BINARY_DIR}/checkpoint/tests) + add_subdirectory(${SRC_DIR}/global/ ${CMAKE_CURRENT_BINARY_DIR}/global/tests) + add_subdirectory(${SRC_DIR}/metrics/tests + ${CMAKE_CURRENT_BINARY_DIR}/metrics/tests) + add_subdirectory(${SRC_DIR}/kernels/tests + ${CMAKE_CURRENT_BINARY_DIR}/kernels/tests) + add_subdirectory(${SRC_DIR}/archetypes/tests + ${CMAKE_CURRENT_BINARY_DIR}/archetypes/tests) + add_subdirectory(${SRC_DIR}/framework/tests + ${CMAKE_CURRENT_BINARY_DIR}/framework/tests) + if(${output}) + add_subdirectory(${SRC_DIR}/output/tests + ${CMAKE_CURRENT_BINARY_DIR}/output/tests) + add_subdirectory(${SRC_DIR}/checkpoint/tests + ${CMAKE_CURRENT_BINARY_DIR}/checkpoint/tests) endif() endif() diff --git a/setups/CMakeLists.txt b/setups/CMakeLists.txt index b1753d7b8..c92c1d345 100644 --- a/setups/CMakeLists.txt +++ b/setups/CMakeLists.txt @@ -1,23 +1,25 @@ # ------------------------------ # @defines: ntt_pgen [INTERFACE] +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_pgen [required] +# +# * ntt_pgen [required] +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] # ------------------------------ add_library(ntt_pgen INTERFACE) -target_link_libraries(ntt_pgen INTERFACE - ntt_global - ntt_framework - ntt_archetypes - ntt_kernels -) +target_link_libraries(ntt_pgen INTERFACE ntt_global ntt_framework + ntt_archetypes ntt_kernels) target_include_directories(ntt_pgen - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/${PGEN} -) \ No newline at end of file + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/${PGEN}) + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d75094c2b..a41b84900 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,27 +1,30 @@ # ------------------------------ # @defines: entity [STATIC/SHARED] +# # @sources: -# - entity.cpp +# +# * entity.cpp +# # @depends: -# - ntt_global [required] -# - ntt_framework [required] -# - ntt_metrics [required] -# - ntt_engine [required] -# - ntt_pgen [required] +# +# * ntt_global [required] +# * ntt_framework [required] +# * ntt_metrics [required] +# * ntt_engine [required] +# * ntt_pgen [required] +# # @uses: -# - kokkos [required] -# - plog [required] -# - toml11 [required] -# - ADIOS2 [optional] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * toml11 [required] +# * ADIOS2 [optional] +# * mpi [optional] # ------------------------------ - set(ENTITY ${PROJECT_NAME}.xc) set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES - ${SRC_DIR}/entity.cpp -) +set(SOURCES ${SRC_DIR}/entity.cpp) add_executable(${ENTITY} entity.cpp) # dependencies @@ -32,7 +35,7 @@ add_subdirectory(${SRC_DIR}/archetypes ${CMAKE_CURRENT_BINARY_DIR}/archetypes) add_subdirectory(${SRC_DIR}/framework ${CMAKE_CURRENT_BINARY_DIR}/framework) add_subdirectory(${SRC_DIR}/engines ${CMAKE_CURRENT_BINARY_DIR}/engines) -if (${output}) +if(${output}) add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) add_subdirectory(${SRC_DIR}/checkpoint ${CMAKE_CURRENT_BINARY_DIR}/checkpoint) endif() diff --git a/src/archetypes/CMakeLists.txt b/src/archetypes/CMakeLists.txt index 7883ba6a5..8e2f325af 100644 --- a/src/archetypes/CMakeLists.txt +++ b/src/archetypes/CMakeLists.txt @@ -1,13 +1,19 @@ # ------------------------------ # @defines: ntt_archetypes [INTERFACE] +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] -# - ntt_kernels [required] +# +# * ntt_global [required] +# * ntt_kernels [required] +# # @uses: -# - kokkos [required] -# - mpi [optional] +# +# * kokkos [required] +# * mpi [optional] # ------------------------------ add_library(ntt_archetypes INTERFACE) @@ -17,5 +23,5 @@ add_dependencies(ntt_archetypes ${libs}) target_link_libraries(ntt_archetypes INTERFACE ${libs}) target_include_directories(ntt_archetypes - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) \ No newline at end of file + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) + diff --git a/src/archetypes/tests/CMakeLists.txt b/src/archetypes/tests/CMakeLists.txt index 4ffc35322..694a6b4f9 100644 --- a/src/archetypes/tests/CMakeLists.txt +++ b/src/archetypes/tests/CMakeLists.txt @@ -1,9 +1,11 @@ # ------------------------------ # @brief: Generates tests for the `ntt_archetypes` module +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/checkpoint/CMakeLists.txt b/src/checkpoint/CMakeLists.txt index d97bd4a34..fa641bfb5 100644 --- a/src/checkpoint/CMakeLists.txt +++ b/src/checkpoint/CMakeLists.txt @@ -1,23 +1,28 @@ # ------------------------------ # @defines: ntt_checkpoint [STATIC/SHARED] +# # @sources: -# - writer.cpp -# - reader.cpp +# +# * writer.cpp +# * reader.cpp +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] +# +# * ntt_global [required] +# # @uses: -# - kokkos [required] -# - ADIOS2 [required] -# - mpi [optional] +# +# * kokkos [required] +# * ADIOS2 [required] +# * mpi [optional] # ------------------------------ set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES - ${SRC_DIR}/writer.cpp - ${SRC_DIR}/reader.cpp -) +set(SOURCES ${SRC_DIR}/writer.cpp ${SRC_DIR}/reader.cpp) add_library(ntt_checkpoint ${SOURCES}) set(libs ntt_global) @@ -25,7 +30,7 @@ add_dependencies(ntt_checkpoint ${libs}) target_link_libraries(ntt_checkpoint PUBLIC ${libs}) target_link_libraries(ntt_checkpoint PRIVATE stdc++fs) -target_include_directories(ntt_checkpoint +target_include_directories( + ntt_checkpoint PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../ - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/checkpoint/tests/CMakeLists.txt b/src/checkpoint/tests/CMakeLists.txt index 3d7475a52..10836554b 100644 --- a/src/checkpoint/tests/CMakeLists.txt +++ b/src/checkpoint/tests/CMakeLists.txt @@ -1,9 +1,11 @@ # ------------------------------ # @brief: Generates tests for the `ntt_checkpoint` module +# # @uses: -# - kokkos [required] -# - adios2 [required] -# - mpi [optional] +# +# * kokkos [required] +# * adios2 [required] +# * mpi [optional] # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) @@ -13,14 +15,14 @@ function(gen_test title) set(src ${title}.cpp) add_executable(${exec} ${src}) - set (libs ntt_checkpoint ntt_global) + set(libs ntt_checkpoint ntt_global) add_dependencies(${exec} ${libs}) target_link_libraries(${exec} PRIVATE ${libs} stdc++fs) add_test(NAME "CHECKPOINT::${title}" COMMAND "${exec}") endfunction() -if (NOT ${mpi}) +if(NOT ${mpi}) gen_test(checkpoint-nompi) else() # gen_test(checkpoint-mpi) diff --git a/src/engines/CMakeLists.txt b/src/engines/CMakeLists.txt index 2ab7289b2..6da2f4efd 100644 --- a/src/engines/CMakeLists.txt +++ b/src/engines/CMakeLists.txt @@ -1,37 +1,43 @@ # ------------------------------ # @defines: ntt_engines [STATIC/SHARED] +# # @sources: -# - engine_printer.cpp -# - engine_init.cpp -# - engine_run.cpp +# +# * engine_printer.cpp +# * engine_init.cpp +# * engine_run.cpp +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] -# - ntt_framework [required] -# - ntt_metrics [required] -# - ntt_kernels [required] -# - ntt_archetypes [required] -# - ntt_pgen [required] -# - ntt_output [optional] +# +# * ntt_global [required] +# * ntt_framework [required] +# * ntt_metrics [required] +# * ntt_kernels [required] +# * ntt_archetypes [required] +# * ntt_pgen [required] +# * ntt_output [optional] +# # @uses: -# - kokkos [required] -# - plog [required] -# - toml11 [required] -# - adios2 [optional] -# - hdf5 [optional] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * toml11 [required] +# * adios2 [optional] +# * hdf5 [optional] +# * mpi [optional] # ------------------------------ set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES - ${SRC_DIR}/engine_printer.cpp - ${SRC_DIR}/engine_init.cpp - ${SRC_DIR}/engine_run.cpp -) +set(SOURCES ${SRC_DIR}/engine_printer.cpp ${SRC_DIR}/engine_init.cpp + ${SRC_DIR}/engine_run.cpp) add_library(ntt_engines ${SOURCES}) -set(libs ntt_global ntt_framework ntt_metrics ntt_archetypes ntt_kernels ntt_pgen) +set(libs ntt_global ntt_framework ntt_metrics ntt_archetypes ntt_kernels + ntt_pgen) if(${output}) list(APPEND libs ntt_output hdf5::hdf5) endif() @@ -39,7 +45,7 @@ add_dependencies(ntt_engines ${libs}) target_link_libraries(ntt_engines PUBLIC ${libs}) target_compile_definitions(ntt_engines PRIVATE PGEN=\"${PGEN}\") -target_include_directories(ntt_engines +target_include_directories( + ntt_engines PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../ - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/framework/CMakeLists.txt b/src/framework/CMakeLists.txt index e01759d14..8802f696b 100644 --- a/src/framework/CMakeLists.txt +++ b/src/framework/CMakeLists.txt @@ -1,41 +1,48 @@ # ------------------------------ # @defines: ntt_framework [STATIC/SHARED] +# # @sources: -# - parameters.cpp -# - simulation.cpp -# - domain/grid.cpp -# - domain/metadomain.cpp -# - domain/communications.cpp -# - domain/checkpoint.cpp -# - containers/particles.cpp -# - containers/fields.cpp -# - domain/output.cpp +# +# * parameters.cpp +# * simulation.cpp +# * domain/grid.cpp +# * domain/metadomain.cpp +# * domain/communications.cpp +# * domain/checkpoint.cpp +# * containers/particles.cpp +# * containers/fields.cpp +# * domain/output.cpp +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] -# - ntt_metrics [required] -# - ntt_kernels [required] -# - ntt_output [optional] +# +# * ntt_global [required] +# * ntt_metrics [required] +# * ntt_kernels [required] +# * ntt_output [optional] +# # @uses: -# - kokkos [required] -# - plog [required] -# - toml11 [required] -# - ADIOS2 [optional] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * toml11 [required] +# * ADIOS2 [optional] +# * mpi [optional] # ------------------------------ set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES - ${SRC_DIR}/parameters.cpp - ${SRC_DIR}/simulation.cpp - ${SRC_DIR}/domain/grid.cpp - ${SRC_DIR}/domain/metadomain.cpp - ${SRC_DIR}/domain/communications.cpp - ${SRC_DIR}/containers/particles.cpp - ${SRC_DIR}/containers/fields.cpp -) -if (${output}) +set(SOURCES + ${SRC_DIR}/parameters.cpp + ${SRC_DIR}/simulation.cpp + ${SRC_DIR}/domain/grid.cpp + ${SRC_DIR}/domain/metadomain.cpp + ${SRC_DIR}/domain/communications.cpp + ${SRC_DIR}/containers/particles.cpp + ${SRC_DIR}/containers/fields.cpp) +if(${output}) list(APPEND SOURCES ${SRC_DIR}/domain/output.cpp) list(APPEND SOURCES ${SRC_DIR}/domain/checkpoint.cpp) endif() @@ -50,7 +57,7 @@ add_dependencies(ntt_framework ${libs}) target_link_libraries(ntt_framework PUBLIC ${libs}) target_link_libraries(ntt_framework PRIVATE stdc++fs) -target_include_directories(ntt_framework +target_include_directories( + ntt_framework PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../ - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/framework/tests/CMakeLists.txt b/src/framework/tests/CMakeLists.txt index 56ad0783b..ce188e9f1 100644 --- a/src/framework/tests/CMakeLists.txt +++ b/src/framework/tests/CMakeLists.txt @@ -1,19 +1,23 @@ # ------------------------------ # @brief: Generates tests for the `ntt_framework` module +# # @uses: -# - kokkos [required] -# - plog [required] -# - toml11 [required] -# - mpi [optional] -# - adios2 [optional] +# +# * kokkos [required] +# * plog [required] +# * toml11 [required] +# * mpi [optional] +# * adios2 [optional] +# # !TODO: -# - add tests for mesh separately -# - add test for 3D metadomain +# +# * add tests for mesh separately +# * add test for 3D metadomain # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) -function(gen_test title) +function(gen_test title is_parallel) set(exec test-framework-${title}.xc) set(src ${title}.cpp) add_executable(${exec} ${src}) @@ -22,24 +26,30 @@ function(gen_test title) add_dependencies(${exec} ${libs}) target_link_libraries(${exec} PRIVATE ${libs}) - add_test(NAME "FRAMEWORK::${title}" COMMAND "${exec}") + if(${is_parallel}) + add_test(NAME "FRAMEWORK::${title}" + COMMAND "${MPIEXEC_EXECUTABLE}" "${MPIEXEC_NUMPROC_FLAG}" "4" + "${exec}") + else() + add_test(NAME "FRAMEWORK::${title}" COMMAND "${exec}") + endif() endfunction() -if (${mpi}) - gen_test(comm_mpi) +if(${mpi}) + gen_test(comm_mpi true) else() - gen_test(parameters) - gen_test(particles) - gen_test(fields) - gen_test(grid_mesh) - if (${DEBUG}) - gen_test(metadomain) + gen_test(parameters false) + gen_test(particles false) + gen_test(fields false) + gen_test(grid_mesh false) + if(${DEBUG}) + gen_test(metadomain false) endif() - gen_test(comm_nompi) + gen_test(comm_nompi false) endif() - # this test is only run manually to ensure ... # ... command line args are working properly ... # ... and that the logging is done correctly -# gen_test(simulation) +# +# gen_test(simulation) diff --git a/src/global/CMakeLists.txt b/src/global/CMakeLists.txt index 334ce078d..97946f059 100644 --- a/src/global/CMakeLists.txt +++ b/src/global/CMakeLists.txt @@ -1,36 +1,38 @@ # ------------------------------ # @defines: ntt_global [STATIC/SHARED] +# # @sources: -# - global.cpp -# - arch/kokkos_aliases.cpp -# - utils/cargs.cpp -# - utils/param_container.cpp -# - utils/timer.cpp -# - utils/diag.cpp -# - utils/progressbar.cpp +# +# * global.cpp +# * arch/kokkos_aliases.cpp +# * utils/cargs.cpp +# * utils/param_container.cpp +# * utils/timer.cpp +# * utils/diag.cpp +# * utils/progressbar.cpp +# # @includes: -# - ./ +# +# * ./ +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] # ------------------------------ set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES - ${SRC_DIR}/global.cpp - ${SRC_DIR}/arch/kokkos_aliases.cpp - ${SRC_DIR}/utils/cargs.cpp - ${SRC_DIR}/utils/timer.cpp - ${SRC_DIR}/utils/diag.cpp - ${SRC_DIR}/utils/progressbar.cpp -) -if (${output}) +set(SOURCES + ${SRC_DIR}/global.cpp ${SRC_DIR}/arch/kokkos_aliases.cpp + ${SRC_DIR}/utils/cargs.cpp ${SRC_DIR}/utils/timer.cpp + ${SRC_DIR}/utils/diag.cpp ${SRC_DIR}/utils/progressbar.cpp) +if(${output}) list(APPEND SOURCES ${SRC_DIR}/utils/param_container.cpp) endif() add_library(ntt_global ${SOURCES}) -target_include_directories(ntt_global +target_include_directories( + ntt_global PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR} -) + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(ntt_global PRIVATE stdc++fs) diff --git a/src/global/tests/CMakeLists.txt b/src/global/tests/CMakeLists.txt index e9e5de687..e30da20a0 100644 --- a/src/global/tests/CMakeLists.txt +++ b/src/global/tests/CMakeLists.txt @@ -1,11 +1,15 @@ # ------------------------------ # @brief: Generates tests for the `ntt_global` module +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] +# # !TODO: -# - add optional tests for the `mpi_aliases.h` +# +# * add optional tests for the `mpi_aliases.h` # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/kernels/CMakeLists.txt b/src/kernels/CMakeLists.txt index d24dff0a4..c8a1f409f 100644 --- a/src/kernels/CMakeLists.txt +++ b/src/kernels/CMakeLists.txt @@ -1,13 +1,19 @@ # ------------------------------ # @defines: ntt_kernels [INTERFACE] +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] +# +# * ntt_global [required] +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] # ------------------------------ add_library(ntt_kernels INTERFACE) @@ -17,5 +23,5 @@ add_dependencies(ntt_kernels ${libs}) target_link_libraries(ntt_kernels INTERFACE ${libs}) target_include_directories(ntt_kernels - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) \ No newline at end of file + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) + diff --git a/src/kernels/tests/CMakeLists.txt b/src/kernels/tests/CMakeLists.txt index e55dbc111..10e8bb944 100644 --- a/src/kernels/tests/CMakeLists.txt +++ b/src/kernels/tests/CMakeLists.txt @@ -1,9 +1,11 @@ # ------------------------------ # @brief: Generates tests for the `ntt_kernels` module +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/metrics/CMakeLists.txt b/src/metrics/CMakeLists.txt index 0f303fcfc..e053bb61c 100644 --- a/src/metrics/CMakeLists.txt +++ b/src/metrics/CMakeLists.txt @@ -1,11 +1,17 @@ # ------------------------------ # @defines: ntt_metrics [INTERFACE] +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] +# +# * ntt_global [required] +# # @uses: -# - kokkos [required] +# +# * kokkos [required] # ------------------------------ add_library(ntt_metrics INTERFACE) @@ -15,5 +21,5 @@ add_dependencies(ntt_metrics ${libs}) target_link_libraries(ntt_metrics INTERFACE ${libs}) target_include_directories(ntt_metrics - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) \ No newline at end of file + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) + diff --git a/src/metrics/tests/CMakeLists.txt b/src/metrics/tests/CMakeLists.txt index 117cb3295..c997ab079 100644 --- a/src/metrics/tests/CMakeLists.txt +++ b/src/metrics/tests/CMakeLists.txt @@ -1,9 +1,11 @@ # ------------------------------ # @brief: Generates tests for the `ntt_metrics` module +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) @@ -13,7 +15,7 @@ function(gen_test title) set(src ${title}.cpp) add_executable(${exec} ${src}) - set (libs ntt_metrics) + set(libs ntt_metrics) add_dependencies(${exec} ${libs}) target_link_libraries(${exec} PRIVATE ${libs}) @@ -25,4 +27,5 @@ gen_test(vec_trans) gen_test(coord_trans) gen_test(sph-qsph) gen_test(ks-qks) -gen_test(sr-cart-sph) \ No newline at end of file +gen_test(sr-cart-sph) + diff --git a/src/output/CMakeLists.txt b/src/output/CMakeLists.txt index 2c25631ec..e6dbcc03a 100644 --- a/src/output/CMakeLists.txt +++ b/src/output/CMakeLists.txt @@ -1,32 +1,37 @@ # ------------------------------ # @defines: ntt_output [STATIC/SHARED] +# # @sources: -# - writer.cpp -# - fields.cpp -# - utils/interpret_prompt.cpp +# +# * writer.cpp +# * fields.cpp +# * utils/interpret_prompt.cpp +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] +# +# * ntt_global [required] +# # @uses: -# - kokkos [required] -# - ADIOS2 [required] -# - mpi [optional] +# +# * kokkos [required] +# * ADIOS2 [required] +# * mpi [optional] # ------------------------------ set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES - ${SRC_DIR}/writer.cpp - ${SRC_DIR}/fields.cpp - ${SRC_DIR}/utils/interpret_prompt.cpp -) +set(SOURCES ${SRC_DIR}/writer.cpp ${SRC_DIR}/fields.cpp + ${SRC_DIR}/utils/interpret_prompt.cpp) add_library(ntt_output ${SOURCES}) set(libs ntt_global) add_dependencies(ntt_output ${libs}) target_link_libraries(ntt_output PUBLIC ${libs}) -target_include_directories(ntt_output +target_include_directories( + ntt_output PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../ - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/output/tests/CMakeLists.txt b/src/output/tests/CMakeLists.txt index 37af95fac..afc7950c4 100644 --- a/src/output/tests/CMakeLists.txt +++ b/src/output/tests/CMakeLists.txt @@ -1,28 +1,36 @@ # ------------------------------ # @brief: Generates tests for the `ntt_output` module +# # @uses: -# - kokkos [required] -# - mpi [optional] -# - adios2 [optional] +# +# * kokkos [required] +# * mpi [optional] +# * adios2 [optional] # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) -function(gen_test title) +function(gen_test title is_parallel) set(exec test-output-${title}.xc) set(src ${title}.cpp) add_executable(${exec} ${src}) - set (libs ntt_output ntt_global ntt_metrics ntt_framework) + set(libs ntt_output ntt_global ntt_metrics ntt_framework) add_dependencies(${exec} ${libs}) target_link_libraries(${exec} PRIVATE ${libs} stdc++fs) - add_test(NAME "OUTPUT::${title}" COMMAND "${exec}") + if(${is_parallel}) + add_test(NAME "OUTPUT::${title}" + COMMAND "${MPIEXEC_EXECUTABLE}" "${MPIEXEC_NUMPROC_FLAG}" "4" + "${exec}") + else() + add_test(NAME "OUTPUT::${title}" COMMAND "${exec}") + endif() endfunction() -if (NOT ${mpi}) - gen_test(fields) - gen_test(writer-nompi) +if(NOT ${mpi}) + gen_test(fields false) + gen_test(writer-nompi false) else() - gen_test(writer-mpi) + gen_test(writer-mpi true) endif() From 8a6321b5309cce50bbe9b56c83736645fd24807c Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 4 Nov 2024 14:30:45 -0500 Subject: [PATCH 027/124] proper gh action (hopefully) --- .github/workflows/actions.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 22f55d8be..3e0de8808 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -15,10 +15,9 @@ jobs: id: check_message run: | if git log -1 --pretty=%B | grep -q "RUNTEST"; then - echo "::set-output name=run_tests::true" + echo "run_tests=true" >> "$GITHUB_OUTPUT" else - echo "::set-output name=run_tests::false" - exit 1 + echo "run_tests=false" >> "$GITHUB_OUTPUT" fi tests: needs: check-commit From 50c7b9be966dd83183038fd14840fee2246f289f Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 4 Nov 2024 14:36:10 -0500 Subject: [PATCH 028/124] readme (RUNTEST) --- README.md | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index d6f4597f5..26ca92072 100644 --- a/README.md +++ b/README.md @@ -10,21 +10,23 @@ Our [detailed documentation](https://entity-toolkit.github.io/) includes everyth [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) -## Core developers (alphabetical) +## Lead developers -πŸ‘€ __Yangyang Cai__ {[@StaticObserver](https://github.com/StaticObserver): GRPIC} +β˜• __Hayk Hakobyan__ {[@haykh](https://github.com/haykh)} -πŸ’β€β™‚οΈ __Alexander Chernoglazov__ {[@SChernoglazov](https://github.com/SChernoglazov): PIC} +πŸ₯” __Jens Mahlmann__ {[@jmahlmann](https://github.com/jmahlmann)} -🍡 __Benjamin Crinquand__ {[@bcrinquand](https://github.com/bcrinquand): GRPIC, cubed-sphere} +πŸ’β€β™‚οΈ __Alexander Chernoglazov__ {[@SChernoglazov](https://github.com/SChernoglazov)} -πŸ§‹ __Alisa Galishnikova__ {[@alisagk](https://github.com/alisagk): GRPIC} +πŸ§‹ __Alisa Galishnikova__ {[@alisagk](https://github.com/alisagk)} -β˜• __Hayk Hakobyan__ {[@haykh](https://github.com/haykh): framework, PIC, GRPIC, cubed-sphere} +🐬 __Sasha Philippov__ {[@sashaph](https://github.com/sashaph)} -πŸ₯” __Jens Mahlmann__ {[@jmahlmann](https://github.com/jmahlmann): framework, MPI, cubed-sphere} +## Contributors (alphabetical) -🐬 __Sasha Philippov__ {[@sashaph](https://github.com/sashaph): all-around} +πŸ‘€ __Yangyang Cai__ {[@StaticObserver](https://github.com/StaticObserver): GRPIC} + +🍡 __Benjamin Crinquand__ {[@bcrinquand](https://github.com/bcrinquand): GRPIC, cubed-sphere} 🀷 __Arno Vanthieghem__ {[@vanthieg](https://github.com/vanthieg): framework, PIC} From 6cd75c105b19f64ee317daab60125ff4ea44ce30 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 4 Nov 2024 14:43:30 -0500 Subject: [PATCH 029/124] minor issue in cmake (RUNTEST) --- cmake/tests.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/tests.cmake b/cmake/tests.cmake index 7820a5192..ca8ee69c4 100644 --- a/cmake/tests.cmake +++ b/cmake/tests.cmake @@ -25,7 +25,8 @@ if(${mpi}) endif() else() # tests without mpi - add_subdirectory(${SRC_DIR}/global/ ${CMAKE_CURRENT_BINARY_DIR}/global/tests) + add_subdirectory(${SRC_DIR}/global/tests + ${CMAKE_CURRENT_BINARY_DIR}/global/tests) add_subdirectory(${SRC_DIR}/metrics/tests ${CMAKE_CURRENT_BINARY_DIR}/metrics/tests) add_subdirectory(${SRC_DIR}/kernels/tests From 727eb1eff09315129acb0e72e81a2d1969554faa Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 4 Nov 2024 14:54:43 -0500 Subject: [PATCH 030/124] test fixed (RUNTEST) --- src/metrics/tests/sr-cart-sph.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/metrics/tests/sr-cart-sph.cpp b/src/metrics/tests/sr-cart-sph.cpp index ec2f6ddc0..42aa5d639 100644 --- a/src/metrics/tests/sr-cart-sph.cpp +++ b/src/metrics/tests/sr-cart-sph.cpp @@ -123,30 +123,30 @@ auto main(int argc, char* argv[]) -> int { const auto res2d = std::vector { 64, 32 }; const auto res3d = std::vector { 64, 32, 16 }; const auto ext1dcart = boundaries_t { - {10.0, 20.0} + { 10.0, 20.0 } }; const auto ext2dcart = boundaries_t { - {0.0, 20.0}, - {0.0, 10.0} + { 0.0, 20.0 }, + { 0.0, 10.0 } }; const auto ext3dcart = boundaries_t { - {-2.0, 2.0}, - {-1.0, 1.0}, - {-0.5, 0.5} + { -2.0, 2.0 }, + { -1.0, 1.0 }, + { -0.5, 0.5 } }; const auto extsph = boundaries_t { - {1.0, 10.0}, - {0.0, constant::PI} + { 1.0, 10.0 }, + { 0.0, constant::PI } }; const auto params = std::map { - {"r0", -ONE}, - { "h", (real_t)0.25} + { "r0", -ONE }, + { "h", (real_t)0.25 } }; testMetric>({ 128 }, ext1dcart); testMetric>(res2d, ext2dcart, 200); testMetric>(res3d, ext3dcart, 500); - testMetric>(res2d, extsph, 10); + testMetric>(res2d, extsph, 100); testMetric>(res2d, extsph, 200, params); } catch (std::exception& e) { From 6bdbeb93806f21a8ca8eede8a70312a988f4dc4c Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 4 Nov 2024 16:56:12 -0500 Subject: [PATCH 031/124] issues in writer tests (RUNTEST) --- .github/workflows/actions.yml | 2 +- dev/runners/Dockerfile.runner.cpu | 6 +++--- src/output/tests/writer-mpi.cpp | 4 ++-- src/output/tests/writer-nompi.cpp | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 3e0de8808..f60ee9061 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -48,7 +48,7 @@ jobs: elif [ "${{ matrix.device }}" = "amd-gpu" ]; then FLAGS="-D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_GFX1100=ON" elif [ "${{ matrix.device }}" = "cpu" ]; then - FLAGS="" + FLAGS="-D mpi=ON" fi cmake -B build -D TESTS=ON -D output=ON -D precision=${{ matrix.precision }} $FLAGS - name: Compile diff --git a/dev/runners/Dockerfile.runner.cpu b/dev/runners/Dockerfile.runner.cpu index fc13ec9b5..3c2cf4926 100644 --- a/dev/runners/Dockerfile.runner.cpu +++ b/dev/runners/Dockerfile.runner.cpu @@ -7,14 +7,14 @@ RUN apt-get update && apt-get upgrade -y # cmake & build tools RUN apt-get remove -y --purge cmake && \ - apt-get install -y sudo wget curl build-essential && \ + apt-get install -y sudo wget curl build-essential openmpi-bin openmpi-common libopenmpi-dev && \ wget "https://github.com/Kitware/CMake/releases/download/v3.29.6/cmake-3.29.6-linux-x86_64.tar.gz" -P /opt && \ tar xvf /opt/cmake-3.29.6-linux-x86_64.tar.gz -C /opt && \ rm /opt/cmake-3.29.6-linux-x86_64.tar.gz ENV PATH=/opt/cmake-3.29.6-linux-x86_64/bin:$PATH # adios2 -RUN apt-get update && apt-get install -y git libhdf5-dev && \ +RUN apt-get update && apt-get install -y git libhdf5-openmpi-dev && \ git clone https://github.com/ornladios/ADIOS2.git /opt/adios2-src && \ cd /opt/adios2-src && \ cmake -B build \ @@ -28,7 +28,7 @@ RUN apt-get update && apt-get install -y git libhdf5-dev && \ -D ADIOS2_USE_ZeroMQ=OFF \ -D BUILD_TESTING=OFF \ -D ADIOS2_BUILD_EXAMPLES=OFF \ - -D ADIOS2_USE_MPI=OFF \ + -D ADIOS2_USE_MPI=ON \ -D ADIOS2_HAVE_HDF5_VOL=OFF \ -D CMAKE_INSTALL_PREFIX=/opt/adios2 && \ cmake --build build -j && \ diff --git a/src/output/tests/writer-mpi.cpp b/src/output/tests/writer-mpi.cpp index c2729f658..5a5ae8007 100644 --- a/src/output/tests/writer-mpi.cpp +++ b/src/output/tests/writer-mpi.cpp @@ -100,8 +100,8 @@ auto main(int argc, char* argv[]) -> int { std::size_t step_read; long double time_read; - reader.Get(io.InquireVariable("Step"), step_read); - reader.Get(io.InquireVariable("Time"), time_read); + reader.Get(io.InquireVariable("Step"), &step_read); + reader.Get(io.InquireVariable("Time"), &time_read); raise::ErrorIf(step_read != step, "Step is not correct", HERE); raise::ErrorIf((float)time_read != (float)step * 0.1f, "Time is not correct", diff --git a/src/output/tests/writer-nompi.cpp b/src/output/tests/writer-nompi.cpp index 3fe42bf1b..803f907e8 100644 --- a/src/output/tests/writer-nompi.cpp +++ b/src/output/tests/writer-nompi.cpp @@ -111,8 +111,8 @@ auto main(int argc, char* argv[]) -> int { std::size_t step_read; long double time_read; - reader.Get(io.InquireVariable("Step"), step_read); - reader.Get(io.InquireVariable("Time"), time_read); + reader.Get(io.InquireVariable("Step"), &step_read); + reader.Get(io.InquireVariable("Time"), &time_read); raise::ErrorIf(step_read != (step + 1) * 10, "Step is not correct", HERE); raise::ErrorIf((float)time_read != 123 + (float)step * 0.4f, "Time is not correct", From cdd3e8717735c43a34ecae4227886693a0ae69a1 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 4 Nov 2024 17:10:28 -0500 Subject: [PATCH 032/124] writer test -> Sync (RUNTEST) --- src/output/tests/writer-mpi.cpp | 8 ++++++-- src/output/tests/writer-nompi.cpp | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/output/tests/writer-mpi.cpp b/src/output/tests/writer-mpi.cpp index 5a5ae8007..c6f5e5a09 100644 --- a/src/output/tests/writer-mpi.cpp +++ b/src/output/tests/writer-mpi.cpp @@ -100,8 +100,12 @@ auto main(int argc, char* argv[]) -> int { std::size_t step_read; long double time_read; - reader.Get(io.InquireVariable("Step"), &step_read); - reader.Get(io.InquireVariable("Time"), &time_read); + reader.Get(io.InquireVariable("Step"), + &step_read, + adios2::Mode::Sync); + reader.Get(io.InquireVariable("Time"), + &time_read, + adios2::Mode::Sync); raise::ErrorIf(step_read != step, "Step is not correct", HERE); raise::ErrorIf((float)time_read != (float)step * 0.1f, "Time is not correct", diff --git a/src/output/tests/writer-nompi.cpp b/src/output/tests/writer-nompi.cpp index 803f907e8..ee93202b9 100644 --- a/src/output/tests/writer-nompi.cpp +++ b/src/output/tests/writer-nompi.cpp @@ -111,8 +111,12 @@ auto main(int argc, char* argv[]) -> int { std::size_t step_read; long double time_read; - reader.Get(io.InquireVariable("Step"), &step_read); - reader.Get(io.InquireVariable("Time"), &time_read); + reader.Get(io.InquireVariable("Step"), + &step_read, + adios2::Mode::Sync); + reader.Get(io.InquireVariable("Time"), + &time_read, + adios2::Mode::Sync); raise::ErrorIf(step_read != (step + 1) * 10, "Step is not correct", HERE); raise::ErrorIf((float)time_read != 123 + (float)step * 0.4f, "Time is not correct", From 3c793cdd31c04035be3a20106a98a7d49a77be53 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 4 Nov 2024 17:19:06 -0500 Subject: [PATCH 033/124] added flushall to tests (RUNTEST) --- src/output/tests/writer-mpi.cpp | 2 ++ src/output/tests/writer-nompi.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/output/tests/writer-mpi.cpp b/src/output/tests/writer-mpi.cpp index c6f5e5a09..6ab16305f 100644 --- a/src/output/tests/writer-mpi.cpp +++ b/src/output/tests/writer-mpi.cpp @@ -84,6 +84,8 @@ auto main(int argc, char* argv[]) -> int { adios.ExitComputationBlock(); } + adios.FlushAll(); + { // read adios2::IO io = adios.DeclareIO("read-test"); diff --git a/src/output/tests/writer-nompi.cpp b/src/output/tests/writer-nompi.cpp index ee93202b9..d22881741 100644 --- a/src/output/tests/writer-nompi.cpp +++ b/src/output/tests/writer-nompi.cpp @@ -93,6 +93,8 @@ auto main(int argc, char* argv[]) -> int { writer.endWriting(); } + adios.FlushAll(); + { // read adios2::IO io = adios.DeclareIO("read-test"); From c18162d9e04ed366895da4905653b45d0e3473f7 Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 4 Nov 2024 18:09:55 -0500 Subject: [PATCH 034/124] correct layout in tests (RUNTEST) --- src/output/tests/writer-nompi.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/output/tests/writer-nompi.cpp b/src/output/tests/writer-nompi.cpp index d22881741..08200d804 100644 --- a/src/output/tests/writer-nompi.cpp +++ b/src/output/tests/writer-nompi.cpp @@ -100,6 +100,8 @@ auto main(int argc, char* argv[]) -> int { adios2::IO io = adios.DeclareIO("read-test"); io.SetEngine("hdf5"); adios2::Engine reader = io.Open("test.h5", adios2::Mode::Read); + const auto layoutRight = io.InquireAttribute("LayoutRight").Data()[0] == + 1; raise::ErrorIf(io.InquireAttribute("NGhosts").Data()[0] != 0, "NGhosts is not correct", @@ -138,6 +140,9 @@ auto main(int argc, char* argv[]) -> int { std::size_t nx1_r = dims[0]; std::size_t nx2_r = dims[1]; std::size_t nx3_r = dims[2]; + if (!layoutRight) { + std::swap(nx1_r, nx3_r); + } raise::ErrorIf((nx1_r != CEILDIV(nx1, dwn1)) || (nx2_r != CEILDIV(nx2, dwn2)) || (nx3_r != CEILDIV(nx3, dwn3)), @@ -151,8 +156,14 @@ auto main(int argc, char* argv[]) -> int { CEILDIV(nx3, dwn3)), HERE); + if (!layoutRight) { + std::swap(nx1_r, nx3_r); + } fieldVar.SetSelection( adios2::Box({ 0, 0, 0 }, { nx1_r, nx2_r, nx3_r })); + if (!layoutRight) { + std::swap(nx1_r, nx3_r); + } field_read = array_t(name, nx1_r, nx2_r, nx3_r); auto field_read_h = Kokkos::create_mirror_view(field_read); reader.Get(fieldVar, field_read_h.data(), adios2::Mode::Sync); From 96a295c62ed9ab8d1a5a5337b851cfc419869534 Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 29 Jul 2024 09:56:03 -0400 Subject: [PATCH 035/124] patch for mpich send buffr --- src/framework/domain/metadomain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index 5e66bc366..ca52a53cf 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -381,7 +381,7 @@ namespace ntt { #if defined(MPI_ENABLED) auto dx_mins = std::vector(g_ndomains); dx_mins[g_mpi_rank] = dx_min; - MPI_Allgather(&dx_mins[g_mpi_rank], + MPI_Allgather(&dx_min, 1, mpi::get_type(), dx_mins.data(), From 2344629a5210d54294cea1e35dfc0e1eae682b90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 3 Aug 2024 14:54:47 -0500 Subject: [PATCH 036/124] initial commit: modification to shock pgen to run magnetized shocks --- setups/srpic/shock/pgen.hpp | 57 ++++++++++++++++++++++++++++++++--- setups/srpic/shock/shock.toml | 4 +++ 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index f07b99878..4a9cc3f09 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -14,6 +14,47 @@ namespace user { using namespace ntt; + template + struct InitFields + { + InitFields(real_t bmag, real_t btheta, real_t bphi, real_t bbeta) : + Bmag { bmag }, Btheta { btheta }, Bphi { bphi }, Bbeta { bbeta } {} + + // alternative: initialize magnetisation from simulation parameters as in Tristan? + // Bmag = math::sqrt(ppc0 * 0.5 * c * c * me * sigma); + + // magnetic field components + Inline auto bx1(const coord_t &x_Ph) const -> real_t + { + return Bmag * math::cos(Btheta / 180.0 * Kokkos::numbers::pi); + } + Inline auto bx2(const coord_t &x_Ph) const -> real_t + { + return Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); + } + Inline auto bx3(const coord_t &x_Ph) const -> real_t + { + return Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); + } + + // electric field components + Inline auto ex1(const coord_t &x_Ph) const -> real_t + { + return ZERO; + } + Inline auto ex2(const coord_t &x_Ph) const -> real_t + { + return -Bbeta * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); + } + Inline auto ex3(const coord_t &x_Ph) const -> real_t + { + return -Bbeta * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); + } + + private: + const real_t Btheta, Bphi, Bbeta, Bmag; + }; + template struct PGen : public arch::ProblemGenerator { // compatibility traits for the problem generator @@ -30,10 +71,18 @@ namespace user { const real_t drift_ux, temperature; - inline PGen(const SimulationParams& p, const Metadomain& m) - : arch::ProblemGenerator(p) - , drift_ux { p.template get("setup.drift_ux") } - , temperature { p.template get("setup.temperature") } {} + const real_t Btheta, Bphi, Bbeta, Bmag; + InitFields init_flds; + + inline PGen(const SimulationParams &p, const Metadomain &m) + : arch::ProblemGenerator { p } + , drift_ux { p.template get("setup.drift_ux") } + , temperature { p.template get("setup.temperature") } + , Bmag { p.template get("setup.Bmag", 0.0) } + , Btheta { p.template get("setup.Btheta", 0.0) } + , Bphi { p.template get("setup.Bphi", 0.0) } + , Bbeta { p.template get("setup.Bbeta", 0.0) } + , init_flds { Bmag, Btheta, Bphi, Bbeta } {} inline PGen() {} diff --git a/setups/srpic/shock/shock.toml b/setups/srpic/shock/shock.toml index f48edb2d6..90571631e 100644 --- a/setups/srpic/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -42,6 +42,10 @@ [setup] drift_ux = 0.1 temperature = 1e-3 + Bmag = 0.0 + Btheta = 0.0 + Bphi = 0.0 + Bbeta = 0.0 [output] interval_time = 0.1 From a5f3485eb9e7f0cddeb45c3e6ebd032c72f1a7fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 5 Sep 2024 15:34:22 -0500 Subject: [PATCH 037/124] fix misunderstanding in setup --- setups/srpic/shock/pgen.hpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index 4a9cc3f09..c3771cde2 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -17,8 +17,8 @@ namespace user { template struct InitFields { - InitFields(real_t bmag, real_t btheta, real_t bphi, real_t bbeta) : - Bmag { bmag }, Btheta { btheta }, Bphi { bphi }, Bbeta { bbeta } {} + InitFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) : + Bmag { bmag }, Btheta { btheta }, Bphi { bphi }, Vx { drift_ux } {} // alternative: initialize magnetisation from simulation parameters as in Tristan? // Bmag = math::sqrt(ppc0 * 0.5 * c * c * me * sigma); @@ -44,15 +44,15 @@ namespace user { } Inline auto ex2(const coord_t &x_Ph) const -> real_t { - return -Bbeta * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); + return -Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); } Inline auto ex3(const coord_t &x_Ph) const -> real_t { - return -Bbeta * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); + return -Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); } private: - const real_t Btheta, Bphi, Bbeta, Bmag; + const real_t Btheta, Bphi, Vx, Bmag; }; template @@ -71,7 +71,7 @@ namespace user { const real_t drift_ux, temperature; - const real_t Btheta, Bphi, Bbeta, Bmag; + const real_t Btheta, Bphi, Bmag; InitFields init_flds; inline PGen(const SimulationParams &p, const Metadomain &m) @@ -81,8 +81,7 @@ namespace user { , Bmag { p.template get("setup.Bmag", 0.0) } , Btheta { p.template get("setup.Btheta", 0.0) } , Bphi { p.template get("setup.Bphi", 0.0) } - , Bbeta { p.template get("setup.Bbeta", 0.0) } - , init_flds { Bmag, Btheta, Bphi, Bbeta } {} + , init_flds { Bmag, Btheta, Bphi, drift_ux } {} inline PGen() {} From 6eb034f3e1637eb4f5e0d1c82747045c7a4ad8ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 11 Sep 2024 14:07:43 -0500 Subject: [PATCH 038/124] fix sign error --- setups/srpic/shock/pgen.hpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index c3771cde2..1194e7fed 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -20,9 +20,6 @@ namespace user { InitFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) : Bmag { bmag }, Btheta { btheta }, Bphi { bphi }, Vx { drift_ux } {} - // alternative: initialize magnetisation from simulation parameters as in Tristan? - // Bmag = math::sqrt(ppc0 * 0.5 * c * c * me * sigma); - // magnetic field components Inline auto bx1(const coord_t &x_Ph) const -> real_t { @@ -44,7 +41,7 @@ namespace user { } Inline auto ex2(const coord_t &x_Ph) const -> real_t { - return -Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); + return Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); } Inline auto ex3(const coord_t &x_Ph) const -> real_t { From 9d1b39b03a909fe3f7c234e85e1c43b7d6226ce3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 11 Sep 2024 14:13:54 -0500 Subject: [PATCH 039/124] Added comment for `InitFields` --- setups/srpic/shock/pgen.hpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index 1194e7fed..715c222df 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -16,7 +16,16 @@ namespace user { template struct InitFields - { + { + /* + Sets up magnetic and electric field components for the simulation. + Must satisfy E = -v x B for Lorentz Force to be zero. + + @param bmag: magnetic field scaling + @param btheta: magnetic field polar angle + @param bphi: magnetic field azimuthal angle + @param drift_ux: drift velocity in the x direction + */ InitFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) : Bmag { bmag }, Btheta { btheta }, Bphi { bphi }, Vx { drift_ux } {} From 037c705a2c11649713e7b81e33778e37fdddc85c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Mon, 23 Sep 2024 11:25:57 -0500 Subject: [PATCH 040/124] fix signs (again) --- setups/srpic/shock/pgen.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index 715c222df..1fdd18faa 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -50,11 +50,11 @@ namespace user { } Inline auto ex2(const coord_t &x_Ph) const -> real_t { - return Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); + return -Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); } Inline auto ex3(const coord_t &x_Ph) const -> real_t { - return -Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); + return Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); } private: From a613d0aa43bf932bf43e756d1a96c080ab9676b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Mon, 23 Sep 2024 11:37:27 -0500 Subject: [PATCH 041/124] removed redundant parameter and added comments --- setups/srpic/shock/shock.toml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setups/srpic/shock/shock.toml b/setups/srpic/shock/shock.toml index 90571631e..e475ae097 100644 --- a/setups/srpic/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -42,10 +42,9 @@ [setup] drift_ux = 0.1 temperature = 1e-3 - Bmag = 0.0 - Btheta = 0.0 - Bphi = 0.0 - Bbeta = 0.0 + Bmag = 0.0 # set to 1.0 if magnetized shock is required + Btheta = 0.0 # magnetic field polar angle + Bphi = 0.0 # magnetic field azimuthal angle [output] interval_time = 0.1 From 31a7d5b1105fdec1ebc48f4623b3a85fbf5513e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 2 Oct 2024 10:10:44 -0500 Subject: [PATCH 042/124] added atmosphere bc to enforce initial magnetic field config at the boundaries --- setups/srpic/shock/pgen.hpp | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index 1fdd18faa..999a7b608 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -57,9 +57,25 @@ namespace user { return Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); } - private: - const real_t Btheta, Bphi, Vx, Bmag; - }; + private: + const real_t Btheta, Bphi, Vx, Bmag; + }; + + template + struct DriveFields : public InitFields { + DriveFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) : + InitFields {bmag, btheta, bphi, drift_ux} {} + + /* Enforce resetting magnetic and electric field at the boundary + This avoids weird */ + using InitFields::bx1; + using InitFields::bx2; + using InitFields::bx3; + + using InitFields::ex1; + using InitFields::ex2; + using InitFields::ex3; + }; template struct PGen : public arch::ProblemGenerator { @@ -91,6 +107,14 @@ namespace user { inline PGen() {} + auto FieldDriver(real_t time) const -> DriveFields { + const real_t bmag = Bmag; + const real_t btheta = Btheta; + const real_t bphi = Bphi; + const real_t ux = drift_ux; + return DriveFields{bmag, btheta, bphi, ux}; + } + inline void InitPrtls(Domain& local_domain) { const auto energy_dist = arch::Maxwellian(local_domain.mesh.metric, local_domain.random_pool, From 78412c85f7b20b592a6919679d747f5711aaf762 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 4 Nov 2024 21:10:04 -0500 Subject: [PATCH 043/124] changed FieldBC to FIXED --- .gitignore | 1 + input.example.toml | 4 +- setups/srpic/magnetar/magnetar.toml | 2 +- setups/srpic/magnetosphere/magnetosphere.toml | 2 +- setups/srpic/monopole/monopole.toml | 2 +- setups/srpic/shock/pgen.hpp | 143 +++++++++--------- setups/srpic/shock/shock.py | 4 +- setups/srpic/shock/shock.toml | 8 +- src/engines/srpic.hpp | 40 +++-- src/global/enums.h | 32 ++-- src/global/utils/numeric.h | 4 + src/kernels/fields_bcs.hpp | 12 +- 12 files changed, 127 insertions(+), 127 deletions(-) diff --git a/.gitignore b/.gitignore index a1b05e751..9a167b9d5 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ venv/ # CMake testing files Testing/ +tags .clangd .schema.json *_old/ diff --git a/input.example.toml b/input.example.toml index 5ee34d65d..e541f6e2b 100644 --- a/input.example.toml +++ b/input.example.toml @@ -90,10 +90,10 @@ # Boundary conditions for fields: # @required # @type: 1/2/3-size array of string tuples, each of size 1 or 2 - # @valid: "PERIODIC", "ABSORB", "ATMOSPHERE", "CUSTOM", "HORIZON" + # @valid: "PERIODIC", "ABSORB", "FIXED", "CUSTOM", "HORIZON" # @example: [["CUSTOM", "ABSORB"]] (for 2D spherical [[rmin, rmax]]) # @note: When periodic in any of the directions, you should only set one value [..., ["PERIODIC"], ...] - # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]) [["ATMOSPHERE", "ABSORB"]] + # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]) [["FIXED", "ABSORB"]] # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["ABSORB"]] fields = "" # Boundary conditions for fields: diff --git a/setups/srpic/magnetar/magnetar.toml b/setups/srpic/magnetar/magnetar.toml index 2a2260af5..cd9ff5695 100644 --- a/setups/srpic/magnetar/magnetar.toml +++ b/setups/srpic/magnetar/magnetar.toml @@ -11,7 +11,7 @@ metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["FIXED", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] [grid.boundaries.absorb] diff --git a/setups/srpic/magnetosphere/magnetosphere.toml b/setups/srpic/magnetosphere/magnetosphere.toml index 34e04b02d..83ade6e48 100644 --- a/setups/srpic/magnetosphere/magnetosphere.toml +++ b/setups/srpic/magnetosphere/magnetosphere.toml @@ -11,7 +11,7 @@ metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["FIXED", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] [grid.boundaries.absorb] diff --git a/setups/srpic/monopole/monopole.toml b/setups/srpic/monopole/monopole.toml index 169837489..322c15dd4 100644 --- a/setups/srpic/monopole/monopole.toml +++ b/setups/srpic/monopole/monopole.toml @@ -11,7 +11,7 @@ metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["FIXED", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] [grid.boundaries.absorb] diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index 999a7b608..30929383f 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -5,6 +5,7 @@ #include "global.h" #include "arch/traits.h" +#include "utils/numeric.h" #include "archetypes/energy_dist.h" #include "archetypes/particle_injector.h" @@ -15,67 +16,66 @@ namespace user { using namespace ntt; template - struct InitFields - { - /* - Sets up magnetic and electric field components for the simulation. - Must satisfy E = -v x B for Lorentz Force to be zero. - - @param bmag: magnetic field scaling - @param btheta: magnetic field polar angle - @param bphi: magnetic field azimuthal angle - @param drift_ux: drift velocity in the x direction - */ - InitFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) : - Bmag { bmag }, Btheta { btheta }, Bphi { bphi }, Vx { drift_ux } {} - - // magnetic field components - Inline auto bx1(const coord_t &x_Ph) const -> real_t - { - return Bmag * math::cos(Btheta / 180.0 * Kokkos::numbers::pi); - } - Inline auto bx2(const coord_t &x_Ph) const -> real_t - { - return Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); - } - Inline auto bx3(const coord_t &x_Ph) const -> real_t - { - return Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); - } - - // electric field components - Inline auto ex1(const coord_t &x_Ph) const -> real_t - { - return ZERO; - } - Inline auto ex2(const coord_t &x_Ph) const -> real_t - { - return -Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::cos(Bphi / 180.0 * Kokkos::numbers::pi); - } - Inline auto ex3(const coord_t &x_Ph) const -> real_t - { - return Vx * Bmag * math::sin(Btheta / 180.0 * Kokkos::numbers::pi) * math::sin(Bphi / 180.0 * Kokkos::numbers::pi); - } - - private: - const real_t Btheta, Bphi, Vx, Bmag; - }; + struct InitFields { + /* + Sets up magnetic and electric field components for the simulation. + Must satisfy E = -v x B for Lorentz Force to be zero. + + @param bmag: magnetic field scaling + @param btheta: magnetic field polar angle + @param bphi: magnetic field azimuthal angle + @param drift_ux: drift velocity in the x direction + */ + InitFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) + : Bmag { bmag * static_cast(convert::deg2rad) } + , Btheta { btheta * static_cast(convert::deg2rad) } + , Bphi { bphi * static_cast(convert::deg2rad) } + , Vx { drift_ux } {} + + // magnetic field components + Inline auto bx1(const coord_t& x_Ph) const -> real_t { + return Bmag * math::cos(Btheta); + } + + Inline auto bx2(const coord_t& x_Ph) const -> real_t { + return Bmag * math::sin(Btheta) * math::sin(Bphi); + } + + Inline auto bx3(const coord_t& x_Ph) const -> real_t { + return Bmag * math::sin(Btheta) * math::cos(Bphi); + } + + // electric field components + Inline auto ex1(const coord_t& x_Ph) const -> real_t { + return ZERO; + } + + Inline auto ex2(const coord_t& x_Ph) const -> real_t { + return -Vx * Bmag * math::sin(Btheta) * math::cos(Bphi); + } + Inline auto ex3(const coord_t& x_Ph) const -> real_t { + return Vx * Bmag * math::sin(Btheta) * math::sin(Bphi); + } + + private: + const real_t Btheta, Bphi, Vx, Bmag; + }; + + /* Enforce resetting magnetic and electric field at the boundary */ template struct DriveFields : public InitFields { - DriveFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) : - InitFields {bmag, btheta, bphi, drift_ux} {} - - /* Enforce resetting magnetic and electric field at the boundary - This avoids weird */ - using InitFields::bx1; - using InitFields::bx2; - using InitFields::bx3; - - using InitFields::ex1; - using InitFields::ex2; - using InitFields::ex3; - }; + DriveFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) + : InitFields { bmag, btheta, bphi, drift_ux } {} + + using InitFields::bx1; + using InitFields::bx2; + using InitFields::bx3; + + using InitFields::ex1; + using InitFields::ex2; + using InitFields::ex3; + }; template struct PGen : public arch::ProblemGenerator { @@ -93,26 +93,22 @@ namespace user { const real_t drift_ux, temperature; - const real_t Btheta, Bphi, Bmag; + const real_t Btheta, Bphi, Bmag; InitFields init_flds; - inline PGen(const SimulationParams &p, const Metadomain &m) - : arch::ProblemGenerator { p } - , drift_ux { p.template get("setup.drift_ux") } - , temperature { p.template get("setup.temperature") } - , Bmag { p.template get("setup.Bmag", 0.0) } - , Btheta { p.template get("setup.Btheta", 0.0) } - , Bphi { p.template get("setup.Bphi", 0.0) } - , init_flds { Bmag, Btheta, Bphi, drift_ux } {} + inline PGen(const SimulationParams& p, const Metadomain& m) + : arch::ProblemGenerator { p } + , drift_ux { p.template get("setup.drift_ux") } + , temperature { p.template get("setup.temperature") } + , Bmag { p.template get("setup.Bmag", ZERO) } + , Btheta { p.template get("setup.Btheta", ZERO) } + , Bphi { p.template get("setup.Bphi", ZERO) } + , init_flds { Bmag, Btheta, Bphi, drift_ux } {} inline PGen() {} auto FieldDriver(real_t time) const -> DriveFields { - const real_t bmag = Bmag; - const real_t btheta = Btheta; - const real_t bphi = Bphi; - const real_t ux = drift_ux; - return DriveFields{bmag, btheta, bphi, ux}; + return DriveFields { Bmag, Btheta, Bphi, drift_ux }; } inline void InitPrtls(Domain& local_domain) { @@ -121,7 +117,8 @@ namespace user { temperature, -drift_ux, in::x1); - const auto injector = arch::UniformInjector( + + const auto injector = arch::UniformInjector( energy_dist, { 1, 2 }); arch::InjectUniform>( diff --git a/setups/srpic/shock/shock.py b/setups/srpic/shock/shock.py index 64224c728..dc1565572 100644 --- a/setups/srpic/shock/shock.py +++ b/setups/srpic/shock/shock.py @@ -2,7 +2,7 @@ import matplotlib.pyplot as plt import matplotlib as mpl -data = nt2r.Data("shock-03.h5") +data = nt2r.Data("shock.h5") def frame(ti, f): @@ -55,7 +55,7 @@ def frame(ti, f): axs = [fig.add_subplot(gs[i]) for i in range(len(quantities))] for ax, q in zip(axs, quantities): - q["compute"](f).coarsen(x=2, y=2).mean().plot( + q["compute"](f.isel(t=ti)).plot( ax=ax, cmap=q["cmap"], norm=q["norm"], diff --git a/setups/srpic/shock/shock.toml b/setups/srpic/shock/shock.toml index e475ae097..f8f5e81a7 100644 --- a/setups/srpic/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -11,7 +11,7 @@ metric = "minkowski" [grid.boundaries] - fields = [["CONDUCTOR", "ABSORB"], ["PERIODIC"]] + fields = [["FIXED", "ABSORB"], ["PERIODIC"]] particles = [["REFLECT", "ABSORB"], ["PERIODIC"]] [scales] @@ -42,9 +42,9 @@ [setup] drift_ux = 0.1 temperature = 1e-3 - Bmag = 0.0 # set to 1.0 if magnetized shock is required - Btheta = 0.0 # magnetic field polar angle - Bphi = 0.0 # magnetic field azimuthal angle + Bmag = 1.0 + Btheta = 0.0 + Bphi = 0.0 [output] interval_time = 0.1 diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 78c8f371e..1747c5138 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -42,7 +42,6 @@ #include #include -#include #include namespace ntt { @@ -586,8 +585,8 @@ namespace ntt { if (domain.mesh.flds_bc_in(direction) == FldsBC::AXIS) { AxisFieldsIn(direction, domain, tags); } - } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::ATMOSPHERE) { - AtmosphereFieldsIn(direction, domain, tags); + } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::FIXED) { + FixedFieldsIn(direction, domain, tags); } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::CONDUCTOR) { if (domain.mesh.flds_bc_in(direction) == FldsBC::CONDUCTOR) { ConductorFieldsIn(direction, domain, tags); @@ -713,11 +712,11 @@ namespace ntt { } } - void AtmosphereFieldsIn(dir::direction_t direction, - domain_t& domain, - BCTags tags) { + void FixedFieldsIn(dir::direction_t direction, + domain_t& domain, + BCTags tags) { /** - * atmosphere boundaries + * fixed field boundaries */ if constexpr (traits::has_member::value) { const auto [sign, dim, xg_min, xg_max] = get_atm_extent(direction); @@ -759,9 +758,9 @@ namespace ntt { if (dim == in::x1) { if (sign > 0) { Kokkos::parallel_for( - "AtmosphereBCFields", + "FixedBCFields", range, - kernel::AtmosphereBoundaries_kernel( + kernel::FixedBoundaries_kernel( domain.fields.em, field_driver, domain.mesh.metric, @@ -769,9 +768,9 @@ namespace ntt { tags)); } else { Kokkos::parallel_for( - "AtmosphereBCFields", + "FixedBCFields", range, - kernel::AtmosphereBoundaries_kernel( + kernel::FixedBoundaries_kernel( domain.fields.em, field_driver, domain.mesh.metric, @@ -782,9 +781,9 @@ namespace ntt { if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { if (sign > 0) { Kokkos::parallel_for( - "AtmosphereBCFields", + "FixedBCFields", range, - kernel::AtmosphereBoundaries_kernel( + kernel::FixedBoundaries_kernel( domain.fields.em, field_driver, domain.mesh.metric, @@ -792,9 +791,9 @@ namespace ntt { tags)); } else { Kokkos::parallel_for( - "AtmosphereBCFields", + "FixedBCFields", range, - kernel::AtmosphereBoundaries_kernel( + kernel::FixedBoundaries_kernel( domain.fields.em, field_driver, domain.mesh.metric, @@ -808,9 +807,9 @@ namespace ntt { if constexpr (M::Dim == Dim::_3D) { if (sign > 0) { Kokkos::parallel_for( - "AtmosphereBCFields", + "FixedBCFields", range, - kernel::AtmosphereBoundaries_kernel( + kernel::FixedBoundaries_kernel( domain.fields.em, field_driver, domain.mesh.metric, @@ -818,9 +817,9 @@ namespace ntt { tags)); } else { Kokkos::parallel_for( - "AtmosphereBCFields", + "FixedBCFields", range, - kernel::AtmosphereBoundaries_kernel( + kernel::FixedBoundaries_kernel( domain.fields.em, field_driver, domain.mesh.metric, @@ -834,8 +833,7 @@ namespace ntt { raise::Error("Invalid dimension", HERE); } } else { - raise::Error("Field driver not implemented in PGEN for atmosphere BCs", - HERE); + raise::Error("Field driver not implemented in PGEN for fixed BCs", HERE); } } diff --git a/src/global/enums.h b/src/global/enums.h index 57822dec4..1946da4b8 100644 --- a/src/global/enums.h +++ b/src/global/enums.h @@ -8,7 +8,7 @@ * - enum ntt::SimEngine // SRPIC, GRPIC * - enum ntt::PrtlBC // periodic, absorb, atmosphere, custom, * reflect, horizon, axis, sync - * - enum ntt::FldsBC // periodic, absorb, atmosphere, custom, + * - enum ntt::FldsBC // periodic, absorb, fixed, custom, * conductor, horizon, axis, sync * - enum ntt::PrtlPusher // boris, vay, photon, none * - enum ntt::Cooling // synchrotron, none @@ -213,25 +213,25 @@ namespace ntt { static constexpr const char* label = "flds_bc"; enum type : uint8_t { - INVALID = 0, - PERIODIC = 1, - ABSORB = 2, - ATMOSPHERE = 3, - CUSTOM = 4, - CONDUCTOR = 5, - HORIZON = 6, - AXIS = 7, - SYNC = 8, // <- SYNC means synchronization with other domains + INVALID = 0, + PERIODIC = 1, + ABSORB = 2, + FIXED = 3, + CUSTOM = 4, + CONDUCTOR = 5, + HORIZON = 6, + AXIS = 7, + SYNC = 8, // <- SYNC means synchronization with other domains }; constexpr FldsBC(uint8_t c) : enums_hidden::BaseEnum { c } {} - static constexpr type variants[] = { PERIODIC, ABSORB, ATMOSPHERE, CUSTOM, - CONDUCTOR, HORIZON, AXIS, SYNC }; - static constexpr const char* lookup[] = { "periodic", "absorb", - "atmosphere", "custom", - "conductor", "horizon", - "axis", "sync" }; + static constexpr type variants[] = { PERIODIC, ABSORB, FIXED, CUSTOM, + CONDUCTOR, HORIZON, AXIS, SYNC }; + static constexpr const char* lookup[] = { "periodic", "absorb", + "fixed", "custom", + "conductor", "horizon", + "axis", "sync" }; static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); }; diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index 0b09f6c11..719256d1d 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -91,4 +91,8 @@ namespace constant { inline constexpr double SQRT3 = 1.73205080756887729352; } // namespace constant +namespace convert { + inline constexpr double deg2rad = constant::PI / 180.0; +} // namespace convert + #endif // GLOBAL_UTILS_NUMERIC_H diff --git a/src/kernels/fields_bcs.hpp b/src/kernels/fields_bcs.hpp index e617010b4..1a9ffcff4 100644 --- a/src/kernels/fields_bcs.hpp +++ b/src/kernels/fields_bcs.hpp @@ -217,7 +217,7 @@ namespace kernel { }; template - struct AtmosphereBoundaries_kernel { + struct FixedBoundaries_kernel { static constexpr Dimension D = M::Dim; static constexpr bool defines_ex1 = traits::has_method::value; static constexpr bool defines_ex2 = traits::has_method::value; @@ -240,11 +240,11 @@ namespace kernel { const std::size_t i_edge; const bool setE, setB; - AtmosphereBoundaries_kernel(ndfield_t& Fld, - const I& finit, - const M& metric, - std::size_t i_edge, - BCTags tags) + FixedBoundaries_kernel(ndfield_t& Fld, + const I& finit, + const M& metric, + std::size_t i_edge, + BCTags tags) : Fld { Fld } , finit { finit } , metric { metric } From 98349950f7392364248c353906e6440369c322ca Mon Sep 17 00:00:00 2001 From: hayk Date: Tue, 5 Nov 2024 13:43:39 -0500 Subject: [PATCH 044/124] FIXED bc + ATM --- CMakeLists.txt | 6 +- input.example.toml | 6 +- setups/srpic/magnetar/magnetar.toml | 2 +- setups/srpic/magnetosphere/magnetosphere.toml | 2 +- setups/srpic/monopole/monopole.toml | 2 +- setups/srpic/shock/pgen.hpp | 42 ++-- src/engines/srpic.hpp | 206 ++++++++++-------- src/global/arch/traits.h | 3 + src/global/enums.h | 33 ++- src/global/tests/enums.cpp | 4 +- src/kernels/fields_bcs.hpp | 12 +- 11 files changed, 166 insertions(+), 152 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ee00d1b4..1a977c990 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,7 +55,9 @@ if(${DEBUG} STREQUAL "OFF") set(CMAKE_BUILD_TYPE Release CACHE STRING "CMake build type") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG") + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -DNDEBUG -Wno-unused-local-typedefs -Wno-unknown-cuda-version" + ) else() set(CMAKE_BUILD_TYPE Debug @@ -64,8 +66,6 @@ else() "${CMAKE_CXX_FLAGS} -DDEBUG -Wall -Wextra -Wno-unknown-pragmas") endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs") - # options set(precisions "single" "double" diff --git a/input.example.toml b/input.example.toml index e541f6e2b..4bf005b0e 100644 --- a/input.example.toml +++ b/input.example.toml @@ -90,10 +90,10 @@ # Boundary conditions for fields: # @required # @type: 1/2/3-size array of string tuples, each of size 1 or 2 - # @valid: "PERIODIC", "ABSORB", "FIXED", "CUSTOM", "HORIZON" + # @valid: "PERIODIC", "ABSORB", "FIXED", "ATMOSPHERE", "CUSTOM", "HORIZON" # @example: [["CUSTOM", "ABSORB"]] (for 2D spherical [[rmin, rmax]]) - # @note: When periodic in any of the directions, you should only set one value [..., ["PERIODIC"], ...] - # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]) [["FIXED", "ABSORB"]] + # @note: When periodic in any of the directions, you should only set one value: [..., ["PERIODIC"], ...] + # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]): [["ATMOSPHERE", "ABSORB"]] # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["ABSORB"]] fields = "" # Boundary conditions for fields: diff --git a/setups/srpic/magnetar/magnetar.toml b/setups/srpic/magnetar/magnetar.toml index cd9ff5695..2a2260af5 100644 --- a/setups/srpic/magnetar/magnetar.toml +++ b/setups/srpic/magnetar/magnetar.toml @@ -11,7 +11,7 @@ metric = "qspherical" [grid.boundaries] - fields = [["FIXED", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] [grid.boundaries.absorb] diff --git a/setups/srpic/magnetosphere/magnetosphere.toml b/setups/srpic/magnetosphere/magnetosphere.toml index 83ade6e48..34e04b02d 100644 --- a/setups/srpic/magnetosphere/magnetosphere.toml +++ b/setups/srpic/magnetosphere/magnetosphere.toml @@ -11,7 +11,7 @@ metric = "qspherical" [grid.boundaries] - fields = [["FIXED", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] [grid.boundaries.absorb] diff --git a/setups/srpic/monopole/monopole.toml b/setups/srpic/monopole/monopole.toml index 322c15dd4..169837489 100644 --- a/setups/srpic/monopole/monopole.toml +++ b/setups/srpic/monopole/monopole.toml @@ -11,7 +11,7 @@ metric = "qspherical" [grid.boundaries] - fields = [["FIXED", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] [grid.boundaries.absorb] diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index 30929383f..1eedb3a01 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -5,6 +5,7 @@ #include "global.h" #include "arch/traits.h" +#include "utils/error.h" #include "utils/numeric.h" #include "archetypes/energy_dist.h" @@ -33,28 +34,28 @@ namespace user { , Vx { drift_ux } {} // magnetic field components - Inline auto bx1(const coord_t& x_Ph) const -> real_t { + Inline auto bx1(const coord_t&) const -> real_t { return Bmag * math::cos(Btheta); } - Inline auto bx2(const coord_t& x_Ph) const -> real_t { + Inline auto bx2(const coord_t&) const -> real_t { return Bmag * math::sin(Btheta) * math::sin(Bphi); } - Inline auto bx3(const coord_t& x_Ph) const -> real_t { + Inline auto bx3(const coord_t&) const -> real_t { return Bmag * math::sin(Btheta) * math::cos(Bphi); } // electric field components - Inline auto ex1(const coord_t& x_Ph) const -> real_t { + Inline auto ex1(const coord_t&) const -> real_t { return ZERO; } - Inline auto ex2(const coord_t& x_Ph) const -> real_t { + Inline auto ex2(const coord_t&) const -> real_t { return -Vx * Bmag * math::sin(Btheta) * math::cos(Bphi); } - Inline auto ex3(const coord_t& x_Ph) const -> real_t { + Inline auto ex3(const coord_t&) const -> real_t { return Vx * Bmag * math::sin(Btheta) * math::sin(Bphi); } @@ -62,21 +63,6 @@ namespace user { const real_t Btheta, Bphi, Vx, Bmag; }; - /* Enforce resetting magnetic and electric field at the boundary */ - template - struct DriveFields : public InitFields { - DriveFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) - : InitFields { bmag, btheta, bphi, drift_ux } {} - - using InitFields::bx1; - using InitFields::bx2; - using InitFields::bx3; - - using InitFields::ex1; - using InitFields::ex2; - using InitFields::ex3; - }; - template struct PGen : public arch::ProblemGenerator { // compatibility traits for the problem generator @@ -107,8 +93,18 @@ namespace user { inline PGen() {} - auto FieldDriver(real_t time) const -> DriveFields { - return DriveFields { Bmag, Btheta, Bphi, drift_ux }; + auto FixField(const em& comp) const -> real_t { + if (comp == em::ex2) { + return init_flds.ex2({ ZERO }); + } else if (comp == em::ex3) { + return init_flds.ex3({ ZERO }); + } else if (comp == em::bx1) { + return init_flds.bx1({ ZERO }); + } else { + raise::Error("Other components should not be requested when BC is in X", + HERE); + return ZERO; + } } inline void InitPrtls(Domain& local_domain) { diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 1747c5138..244a5f863 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -585,11 +585,11 @@ namespace ntt { if (domain.mesh.flds_bc_in(direction) == FldsBC::AXIS) { AxisFieldsIn(direction, domain, tags); } + } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::ATMOSPHERE) { + AtmosphereFieldsIn(direction, domain, tags); } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::FIXED) { - FixedFieldsIn(direction, domain, tags); - } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::CONDUCTOR) { - if (domain.mesh.flds_bc_in(direction) == FldsBC::CONDUCTOR) { - ConductorFieldsIn(direction, domain, tags); + if (domain.mesh.flds_bc_in(direction) == FldsBC::FIXED) { + FixedFieldsIn(direction, domain, tags); } } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::CUSTOM) { if (domain.mesh.flds_bc_in(direction) == FldsBC::CUSTOM) { @@ -718,6 +718,100 @@ namespace ntt { /** * fixed field boundaries */ + const auto sign = direction.get_sign(); + const auto dim = direction.get_dim(); + raise::ErrorIf(dim != in::x1 and M::CoordType != Coord::Cart, + "Fixed BCs only implemented for x1 in " + "non-cartesian coordinates", + HERE); + em normal_b_comp, tang_e_comp1, tang_e_comp2; + if (dim == in::x1) { + normal_b_comp = em::bx1; + tang_e_comp1 = em::ex2; + tang_e_comp2 = em::ex3; + } else if (dim == in::x2) { + normal_b_comp = em::bx2; + tang_e_comp1 = em::ex1; + tang_e_comp2 = em::ex3; + } else if (dim == in::x3) { + normal_b_comp = em::bx3; + tang_e_comp1 = em::ex1; + tang_e_comp2 = em::ex2; + } else { + raise::Error("Invalid dimension", HERE); + } + std::vector xi_min, xi_max; + const std::vector all_dirs { in::x1, in::x2, in::x3 }; + for (unsigned short d { 0 }; d < static_cast(M::Dim); ++d) { + const auto dd = all_dirs[d]; + if (dim == dd) { + if (sign > 0) { // + direction + xi_min.push_back(domain.mesh.n_all(dd) - N_GHOSTS); + xi_max.push_back(domain.mesh.n_all(dd)); + } else { // - direction + xi_min.push_back(0); + xi_max.push_back(N_GHOSTS); + } + } else { + xi_min.push_back(0); + xi_max.push_back(domain.mesh.n_all(dd)); + } + } + raise::ErrorIf(xi_min.size() != xi_max.size() or + xi_min.size() != static_cast(M::Dim), + "Invalid range size", + HERE); + std::vector comps; + if (tags & BC::E) { + comps.push_back(tang_e_comp1); + comps.push_back(tang_e_comp2); + } + if (tags & BC::B) { + comps.push_back(normal_b_comp); + } + if constexpr (traits::has_member::value) { + raise::Error("Field driver for fixed fields not implemented", HERE); + } else { + // if field driver not present, set fields to fixed values + for (const auto& comp : comps) { + auto value = ZERO; + if constexpr ( + traits::has_member::value) { + // if fix field function present, read from it + value = m_pgen.FixField((em)comp); + } + if constexpr (M::Dim == Dim::_1D) { + Kokkos::deep_copy(Kokkos::subview(domain.fields.em, + std::make_pair(xi_min[0], xi_max[0]), + comp), + value); + } else if constexpr (M::Dim == Dim::_2D) { + Kokkos::deep_copy(Kokkos::subview(domain.fields.em, + std::make_pair(xi_min[0], xi_max[0]), + std::make_pair(xi_min[1], xi_max[1]), + comp), + value); + } else if constexpr (M::Dim == Dim::_3D) { + Kokkos::deep_copy( + Kokkos::subview(domain.fields.em, + std::make_pair(xi_min[0], xi_max[0]), + std::make_pair(xi_min[1], xi_max[1]), + std::make_pair(xi_min[2], xi_max[2]), + comp), + value); + } else { + raise::Error("Invalid dimension", HERE); + } + } + } + } + + void AtmosphereFieldsIn(dir::direction_t direction, + domain_t& domain, + BCTags tags) { + /** + * atmosphere field boundaries + */ if constexpr (traits::has_member::value) { const auto [sign, dim, xg_min, xg_max] = get_atm_extent(direction); const auto dd = static_cast(dim); @@ -758,9 +852,9 @@ namespace ntt { if (dim == in::x1) { if (sign > 0) { Kokkos::parallel_for( - "FixedBCFields", + "AtmosphereBCFields", range, - kernel::FixedBoundaries_kernel( + kernel::EnforcedBoundaries_kernel( domain.fields.em, field_driver, domain.mesh.metric, @@ -768,9 +862,9 @@ namespace ntt { tags)); } else { Kokkos::parallel_for( - "FixedBCFields", + "AtmosphereBCFields", range, - kernel::FixedBoundaries_kernel( + kernel::EnforcedBoundaries_kernel( domain.fields.em, field_driver, domain.mesh.metric, @@ -781,9 +875,9 @@ namespace ntt { if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { if (sign > 0) { Kokkos::parallel_for( - "FixedBCFields", + "AtmosphereBCFields", range, - kernel::FixedBoundaries_kernel( + kernel::EnforcedBoundaries_kernel( domain.fields.em, field_driver, domain.mesh.metric, @@ -791,9 +885,9 @@ namespace ntt { tags)); } else { Kokkos::parallel_for( - "FixedBCFields", + "AtmosphereBCFields", range, - kernel::FixedBoundaries_kernel( + kernel::EnforcedBoundaries_kernel( domain.fields.em, field_driver, domain.mesh.metric, @@ -807,9 +901,9 @@ namespace ntt { if constexpr (M::Dim == Dim::_3D) { if (sign > 0) { Kokkos::parallel_for( - "FixedBCFields", + "AtmosphereBCFields", range, - kernel::FixedBoundaries_kernel( + kernel::EnforcedBoundaries_kernel( domain.fields.em, field_driver, domain.mesh.metric, @@ -817,9 +911,9 @@ namespace ntt { tags)); } else { Kokkos::parallel_for( - "FixedBCFields", + "AtmosphereBCFields", range, - kernel::FixedBoundaries_kernel( + kernel::EnforcedBoundaries_kernel( domain.fields.em, field_driver, domain.mesh.metric, @@ -833,86 +927,8 @@ namespace ntt { raise::Error("Invalid dimension", HERE); } } else { - raise::Error("Field driver not implemented in PGEN for fixed BCs", HERE); - } - } - - void ConductorFieldsIn(dir::direction_t direction, - domain_t& domain, - BCTags tags) { - const auto sign = direction.get_sign(); - const auto dim = direction.get_dim(); - raise::ErrorIf( - dim != in::x1 and M::CoordType != Coord::Cart, - "Conductor BCs only implemented for x1 in non-cartesian coordinates", - HERE); - em normal_b_comp, tang_e_comp1, tang_e_comp2; - if (dim == in::x1) { - normal_b_comp = em::bx1; - tang_e_comp1 = em::ex2; - tang_e_comp2 = em::ex3; - } else if (dim == in::x2) { - normal_b_comp = em::bx2; - tang_e_comp1 = em::ex1; - tang_e_comp2 = em::ex3; - } else if (dim == in::x3) { - normal_b_comp = em::bx3; - tang_e_comp1 = em::ex1; - tang_e_comp2 = em::ex2; - } else { - raise::Error("Invalid dimension", HERE); - } - std::vector xi_min, xi_max; - const std::vector all_dirs { in::x1, in::x2, in::x3 }; - for (unsigned short d { 0 }; d < static_cast(M::Dim); ++d) { - const auto dd = all_dirs[d]; - if (dim == dd) { - if (sign > 0) { // + direction - xi_min.push_back(domain.mesh.n_all(dd) - N_GHOSTS); - xi_max.push_back(domain.mesh.n_all(dd)); - } else { // - direction - xi_min.push_back(0); - xi_max.push_back(N_GHOSTS); - } - } else { - xi_min.push_back(0); - xi_max.push_back(domain.mesh.n_all(dd)); - } - } - raise::ErrorIf(xi_min.size() != xi_max.size() or - xi_min.size() != static_cast(M::Dim), - "Invalid range size", + raise::Error("Field driver not implemented in PGEN for atmosphere BCs", HERE); - std::vector comps; - if (tags & BC::E) { - comps.push_back(tang_e_comp1); - comps.push_back(tang_e_comp2); - } - if (tags & BC::B) { - comps.push_back(normal_b_comp); - } - for (const auto& comp : comps) { - if constexpr (M::Dim == Dim::_1D) { - Kokkos::deep_copy(Kokkos::subview(domain.fields.em, - std::make_pair(xi_min[0], xi_max[0]), - comp), - ZERO); - } else if constexpr (M::Dim == Dim::_2D) { - Kokkos::deep_copy(Kokkos::subview(domain.fields.em, - std::make_pair(xi_min[0], xi_max[0]), - std::make_pair(xi_min[1], xi_max[1]), - comp), - ZERO); - } else if constexpr (M::Dim == Dim::_3D) { - Kokkos::deep_copy(Kokkos::subview(domain.fields.em, - std::make_pair(xi_min[0], xi_max[0]), - std::make_pair(xi_min[1], xi_max[1]), - std::make_pair(xi_min[2], xi_max[2]), - comp), - ZERO); - } else { - raise::Error("Invalid dimension", HERE); - } } } diff --git a/src/global/arch/traits.h b/src/global/arch/traits.h index e915bdf1a..9fd40e201 100644 --- a/src/global/arch/traits.h +++ b/src/global/arch/traits.h @@ -96,6 +96,9 @@ namespace traits { template using field_driver_t = decltype(&T::FieldDriver); + template + using fix_field_t = decltype(&T::FixField); + template using custom_fields_t = decltype(&T::CustomFields); diff --git a/src/global/enums.h b/src/global/enums.h index 1946da4b8..283cb456d 100644 --- a/src/global/enums.h +++ b/src/global/enums.h @@ -8,8 +8,8 @@ * - enum ntt::SimEngine // SRPIC, GRPIC * - enum ntt::PrtlBC // periodic, absorb, atmosphere, custom, * reflect, horizon, axis, sync - * - enum ntt::FldsBC // periodic, absorb, fixed, custom, - * conductor, horizon, axis, sync + * - enum ntt::FldsBC // periodic, absorb, fixed, atmosphere, + * custom, horizon, axis, sync * - enum ntt::PrtlPusher // boris, vay, photon, none * - enum ntt::Cooling // synchrotron, none * - enum ntt::FldsID // e, dive, d, divd, b, h, j, @@ -213,25 +213,24 @@ namespace ntt { static constexpr const char* label = "flds_bc"; enum type : uint8_t { - INVALID = 0, - PERIODIC = 1, - ABSORB = 2, - FIXED = 3, - CUSTOM = 4, - CONDUCTOR = 5, - HORIZON = 6, - AXIS = 7, - SYNC = 8, // <- SYNC means synchronization with other domains + INVALID = 0, + PERIODIC = 1, + ABSORB = 2, + FIXED = 3, + ATMOSPHERE = 4, + CUSTOM = 5, + HORIZON = 6, + AXIS = 7, + SYNC = 8, // <- SYNC means synchronization with other domains }; constexpr FldsBC(uint8_t c) : enums_hidden::BaseEnum { c } {} - static constexpr type variants[] = { PERIODIC, ABSORB, FIXED, CUSTOM, - CONDUCTOR, HORIZON, AXIS, SYNC }; - static constexpr const char* lookup[] = { "periodic", "absorb", - "fixed", "custom", - "conductor", "horizon", - "axis", "sync" }; + static constexpr type variants[] = { PERIODIC, ABSORB, FIXED, ATMOSPHERE, + CUSTOM, HORIZON, AXIS, SYNC }; + static constexpr const char* lookup[] = { "periodic", "absorb", "fixed", + "atmosphere", "custom", "horizon", + "axis", "sync" }; static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); }; diff --git a/src/global/tests/enums.cpp b/src/global/tests/enums.cpp index 1fc57398f..4d678e85e 100644 --- a/src/global/tests/enums.cpp +++ b/src/global/tests/enums.cpp @@ -61,8 +61,8 @@ auto main() -> int { enum_str_t all_simulation_engines = { "srpic", "grpic" }; enum_str_t all_particle_bcs = { "periodic", "absorb", "atmosphere", "custom", "reflect", "horizon", "axis", "sync" }; - enum_str_t all_fields_bcs = { "periodic", "absorb", "atmosphere", "custom", - "horizon", "conductor", "axis", "sync" }; + enum_str_t all_fields_bcs = { "periodic", "absorb", "fixed", "atmosphere", + "custom", "horizon", "axis", "sync" }; enum_str_t all_particle_pushers = { "boris", "vay", "photon", "none" }; enum_str_t all_coolings = { "synchrotron", "none" }; diff --git a/src/kernels/fields_bcs.hpp b/src/kernels/fields_bcs.hpp index 1a9ffcff4..2f2a458bb 100644 --- a/src/kernels/fields_bcs.hpp +++ b/src/kernels/fields_bcs.hpp @@ -217,7 +217,7 @@ namespace kernel { }; template - struct FixedBoundaries_kernel { + struct EnforcedBoundaries_kernel { static constexpr Dimension D = M::Dim; static constexpr bool defines_ex1 = traits::has_method::value; static constexpr bool defines_ex2 = traits::has_method::value; @@ -240,11 +240,11 @@ namespace kernel { const std::size_t i_edge; const bool setE, setB; - FixedBoundaries_kernel(ndfield_t& Fld, - const I& finit, - const M& metric, - std::size_t i_edge, - BCTags tags) + EnforcedBoundaries_kernel(ndfield_t& Fld, + const I& finit, + const M& metric, + std::size_t i_edge, + BCTags tags) : Fld { Fld } , finit { finit } , metric { metric } From 833f038e88e2b0b00bccdea2425127dc777d4e1a Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 6 Nov 2024 17:02:16 -0500 Subject: [PATCH 045/124] multifile output --- input.example.toml | 4 ++ src/framework/domain/output.cpp | 11 +++-- src/framework/parameters.cpp | 10 ++++- src/global/global.h | 11 +++++ src/output/CMakeLists.txt | 1 + src/output/writer.cpp | 73 ++++++++++++++++++++++++++------- src/output/writer.h | 10 +++-- 7 files changed, 98 insertions(+), 22 deletions(-) diff --git a/input.example.toml b/input.example.toml index 4bf005b0e..ce5a2079d 100644 --- a/input.example.toml +++ b/input.example.toml @@ -320,6 +320,10 @@ # @default: -1.0 (disabled) # @note: When `interval_time` < 0, the output is controlled by `interval`, otherwise by `interval_time` interval_time = "" + # Whether to output each timestep into separate files: + # @type: bool + # @default: true + separate_files = "" [output.fields] # Toggle for the field output: diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index c7cb6bb65..d88e593b5 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -65,7 +65,8 @@ namespace ntt { g_writer.init(ptr_adios, params.template get("output.format"), - params.template get("simulation.name")); + params.template get("simulation.name"), + params.template get("output.separate_files")); g_writer.defineMeshLayout(glob_shape_with_ghosts, off_ncells_with_ghosts, loc_shape_with_ghosts, @@ -215,8 +216,8 @@ namespace ntt { "local_domain is a placeholder", HERE); logger::Checkpoint("Writing output", HERE); - g_writer.beginWriting(current_step, current_time); if (write_fields) { + g_writer.beginWriting(WriteMode::Fields, current_step, current_time); const auto incl_ghosts = params.template get("output.debug.ghosts"); const auto dwn = params.template get>( "output.fields.downsampling"); @@ -467,9 +468,11 @@ namespace ntt { } g_writer.writeField(names, local_domain->fields.bckp, addresses); } + g_writer.endWriting(WriteMode::Fields); } // end shouldWrite("fields", step, time) if (write_particles) { + g_writer.beginWriting(WriteMode::Particles, current_step, current_time); const auto prtl_stride = params.template get( "output.particles.stride"); for (const auto& prtl : g_writer.speciesWriters()) { @@ -547,9 +550,11 @@ namespace ntt { g_writer.writeParticleQuantity(buff_x3, glob_tot, offset, prtl.name("X", 3)); } } + g_writer.endWriting(WriteMode::Particles); } // end shouldWrite("particles", step, time) if (write_spectra) { + g_writer.beginWriting(WriteMode::Spectra, current_step, current_time); const auto log_bins = params.template get( "output.spectra.log_bins"); const auto n_bins = params.template get( @@ -613,9 +618,9 @@ namespace ntt { g_writer.writeSpectrum(dn, spec.name()); } g_writer.writeSpectrumBins(energy, "sEbn"); + g_writer.endWriting(WriteMode::Spectra); } // end shouldWrite("spectra", step, time) - g_writer.endWriting(); return true; } diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index b667b5ac9..91a14ae09 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -463,6 +463,9 @@ namespace ntt { toml::find_or(toml_data, "output", "interval", defaults::output::interval)); set("output.interval_time", toml::find_or(toml_data, "output", "interval_time", -1.0)); + set("output.separate_files", + toml::find_or(toml_data, "output", "separate_files", true)); + promiseToDefine("output.fields.interval"); promiseToDefine("output.fields.interval_time"); promiseToDefine("output.fields.enable"); @@ -509,11 +512,16 @@ namespace ntt { set("output.fields.downsampling", field_dwn); // particles + auto all_specs = std::vector {}; + const auto nspec = get("particles.nspec"); + for (auto i = 0u; i < nspec; ++i) { + all_specs.push_back(static_cast(i + 1)); + } const auto prtl_out = toml::find_or(toml_data, "output", "particles", "species", - std::vector {}); + all_specs); set("output.particles.species", prtl_out); set("output.particles.stride", toml::find_or(toml_data, diff --git a/src/global/global.h b/src/global/global.h index ad524fb0e..d55d8e21e 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -249,6 +249,17 @@ namespace Comm { typedef int CommTags; +namespace WriteMode { + enum WriteModeTags_ { + None = 0, + Fields = 1 << 0, + Particles = 1 << 1, + Spectra = 1 << 2, + }; +} // namespace WriteMode + +typedef int WriteModeTags; + namespace BC { enum BCTags_ { None = 0, diff --git a/src/output/CMakeLists.txt b/src/output/CMakeLists.txt index e6dbcc03a..81333e9ff 100644 --- a/src/output/CMakeLists.txt +++ b/src/output/CMakeLists.txt @@ -30,6 +30,7 @@ add_library(ntt_output ${SOURCES}) set(libs ntt_global) add_dependencies(ntt_output ${libs}) target_link_libraries(ntt_output PUBLIC ${libs}) +target_link_libraries(ntt_output PRIVATE stdc++fs) target_include_directories( ntt_output diff --git a/src/output/writer.cpp b/src/output/writer.cpp index 4ba0ea14c..95965c864 100644 --- a/src/output/writer.cpp +++ b/src/output/writer.cpp @@ -18,6 +18,7 @@ #include #endif +#include #include #include @@ -25,9 +26,11 @@ namespace out { void Writer::init(adios2::ADIOS* ptr_adios, const std::string& engine, - const std::string& title) { - m_engine = engine; - p_adios = ptr_adios; + const std::string& title, + bool use_separate_files) { + m_separate_files = use_separate_files; + m_engine = engine; + p_adios = ptr_adios; raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); @@ -36,7 +39,7 @@ namespace out { m_io.DefineVariable("Step"); m_io.DefineVariable("Time"); - m_fname = title + (m_engine == "hdf5" ? ".h5" : ".bp"); + m_fname = title; } void Writer::addTracker(const std::string& type, @@ -412,33 +415,75 @@ namespace out { m_writer.Put(vare, xe_h); } - void Writer::beginWriting(std::size_t tstep, long double time) { + void Writer::beginWriting(WriteModeTags write_mode, + std::size_t tstep, + long double time) { + raise::ErrorIf(write_mode == WriteMode::None, "None is not a valid mode", HERE); raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); - p_adios->ExitComputationBlock(); - if (m_writing_mode) { + if (m_active_mode != WriteMode::None) { raise::Fatal("Already writing", HERE); } - m_writing_mode = true; + m_active_mode = write_mode; try { - m_writer = m_io.Open(m_fname, m_mode); + std::string filename; + const std::string ext = m_engine == "hdf5" ? "h5" : "bp"; + if (m_separate_files) { + std::string mode_str; + if (m_active_mode == WriteMode::Fields) { + mode_str = "fields"; + } else if (m_active_mode == WriteMode::Particles) { + mode_str = "particles"; + } else if (m_active_mode == WriteMode::Spectra) { + mode_str = "spectra"; + } else { + raise::Fatal("Unknown write mode", HERE); + } + CallOnce( + [](auto& main_path, auto& mode_path) { + const std::filesystem::path main { main_path }; + const std::filesystem::path mode { mode_path }; + if (!std::filesystem::exists(main_path)) { + std::filesystem::create_directory(main_path); + } + if (!std::filesystem::exists(main / mode)) { + std::filesystem::create_directory(main / mode); + } + }, + m_fname, + mode_str); + filename = fmt::format("%s/%s/%s.%08lu.%s", + m_fname.c_str(), + mode_str.c_str(), + mode_str.c_str(), + tstep, + ext.c_str()); + m_mode = adios2::Mode::Write; + } else { + filename = fmt::format("%s.%s", m_fname.c_str(), ext.c_str()); + m_mode = std::filesystem::exists(filename) ? adios2::Mode::Append + : adios2::Mode::Write; + } + m_writer = m_io.Open(filename, m_mode); } catch (std::exception& e) { raise::Fatal(e.what(), HERE); } - m_mode = adios2::Mode::Append; m_writer.BeginStep(); m_writer.Put(m_io.InquireVariable("Step"), &tstep); m_writer.Put(m_io.InquireVariable("Time"), &time); } - void Writer::endWriting() { + void Writer::endWriting(WriteModeTags write_mode) { + raise::ErrorIf(write_mode == WriteMode::None, "None is not a valid mode", HERE); raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); - if (!m_writing_mode) { + if (m_active_mode == WriteMode::None) { raise::Fatal("Not writing", HERE); } - m_writing_mode = false; + if (m_active_mode != write_mode) { + raise::Fatal("Writing mode mismatch", HERE); + } + m_active_mode = WriteMode::None; m_writer.EndStep(); m_writer.Close(); - p_adios->EnterComputationBlock(); } template void Writer::writeField(const std::vector&, diff --git a/src/output/writer.h b/src/output/writer.h index 566da44b2..a8abf4b12 100644 --- a/src/output/writer.h +++ b/src/output/writer.h @@ -36,6 +36,8 @@ namespace out { adios2::Engine m_writer; adios2::Mode m_mode { adios2::Mode::Write }; + bool m_separate_files; + // global shape of the fields array to output std::vector m_flds_g_shape; // local corner of the fields array to output @@ -63,7 +65,7 @@ namespace out { std::vector m_prtl_writers; std::vector m_spectra_writers; - bool m_writing_mode { false }; + WriteModeTags m_active_mode { WriteMode::None }; public: Writer() {} @@ -72,7 +74,7 @@ namespace out { Writer(Writer&&) = default; - void init(adios2::ADIOS*, const std::string&, const std::string&); + void init(adios2::ADIOS*, const std::string&, const std::string&, bool); void setMode(adios2::Mode); @@ -106,8 +108,8 @@ namespace out { void writeSpectrum(const array_t&, const std::string&); void writeSpectrumBins(const array_t&, const std::string&); - void beginWriting(std::size_t, long double); - void endWriting(); + void beginWriting(WriteModeTags, std::size_t, long double); + void endWriting(WriteModeTags); /* getters -------------------------------------------------------------- */ auto fname() const -> const std::string& { From 7c940f7586b95173546106622297b85bb9c01cec Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 11 Nov 2024 19:37:57 -0500 Subject: [PATCH 046/124] 0 particle case in timers --- src/global/utils/timer.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/global/utils/timer.cpp b/src/global/utils/timer.cpp index b5f4408ca..249feb6f5 100644 --- a/src/global/utils/timer.cpp +++ b/src/global/utils/timer.cpp @@ -116,11 +116,12 @@ namespace timer { (timer.second / local_tot) * 100.0); timer_stats.insert( { name, - std::make_tuple(timer.second, - timer.second / static_cast(npart), - timer.second / static_cast(ncells), - pcent, - 0u) }); + std::make_tuple( + timer.second, + npart > 0 ? timer.second / static_cast(npart) : 0.0, + timer.second / static_cast(ncells), + pcent, + 0u) }); } timer_stats.insert({ "Total", std::make_tuple(local_tot, 0.0, 0.0, 100u, 0u) }); #endif From c1f5ab67174a2b075f8f335647b600fb3a578216 Mon Sep 17 00:00:00 2001 From: hayk Date: Sat, 30 Nov 2024 19:21:08 -0500 Subject: [PATCH 047/124] benchmark flow --- CMakeLists.txt | 3 +++ benchmark/benchmark.cpp | 35 +++++++++++++++++++++++++++++++++++ cmake/benchmark.cmake | 24 ++++++++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 benchmark/benchmark.cpp create mode 100644 cmake/benchmark.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ee00d1b4..2618a0cb2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,6 +134,9 @@ link_libraries(${DEPENDENCIES}) if(TESTS) # ---------------------------------- Tests --------------------------------- # include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/tests.cmake) +elseif(BENCHMARK) + # ------------------------------ Benchmark --------------------------------- # + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/benchmark.cmake) else() # ----------------------------------- GUI ---------------------------------- # if(${gui}) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp new file mode 100644 index 000000000..b5a7631c4 --- /dev/null +++ b/benchmark/benchmark.cpp @@ -0,0 +1,35 @@ +#include "enums.h" +#include "global.h" + +#include "framework/containers/particles.h" + +auto main(int argc, char* argv[]) -> int { + ntt::GlobalInitialize(argc, argv); + // auto species = ntt::ParticleSpecies(1u, + // "test_e", + // 1.0f, + // 1.0f, + // 10000000, + // ntt::PrtlPusher::BORIS, + // false, + // ntt::Cooling::NONE); + ntt::GlobalFinalize(); + // * @param global_ndomains total number of domains + // * @param global_decomposition decomposition of the global domain + // * @param global_ncells number of cells in each dimension + // * @param global_extent physical extent of the global domain + // * @param global_flds_bc boundary conditions for fields + // * @param global_prtl_bc boundary conditions for particles + // * @param metric_params parameters for the metric + // * @param species_params parameters for the particle species + // Metadomain(unsigned int, + // const std::vector&, + // const std::vector&, + // const boundaries_t&, + // const boundaries_t&, + // const boundaries_t&, + // const std::map&, + // const std::vector&); + + return 0; +} diff --git a/cmake/benchmark.cmake b/cmake/benchmark.cmake new file mode 100644 index 000000000..d2e8ca47c --- /dev/null +++ b/cmake/benchmark.cmake @@ -0,0 +1,24 @@ +set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) + +add_subdirectory(${SRC_DIR}/global ${CMAKE_CURRENT_BINARY_DIR}/global) +add_subdirectory(${SRC_DIR}/metrics ${CMAKE_CURRENT_BINARY_DIR}/metrics) +add_subdirectory(${SRC_DIR}/kernels ${CMAKE_CURRENT_BINARY_DIR}/kernels) +add_subdirectory(${SRC_DIR}/archetypes ${CMAKE_CURRENT_BINARY_DIR}/archetypes) +add_subdirectory(${SRC_DIR}/framework ${CMAKE_CURRENT_BINARY_DIR}/framework) + +if(${output}) + add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) + add_subdirectory(${SRC_DIR}/checkpoint ${CMAKE_CURRENT_BINARY_DIR}/checkpoint) +endif() + +set(exec benchmark.xc) +set(src ${CMAKE_CURRENT_SOURCE_DIR}/benchmark/benchmark.cpp) + +add_executable(${exec} ${src}) + +set(libs ntt_global ntt_metrics ntt_kernels ntt_archetypes ntt_framework) +if(${output}) + list(APPEND libs ntt_output ntt_checkpoint) +endif() +add_dependencies(${exec} ${libs}) +target_link_libraries(${exec} PRIVATE ${libs} stdc++fs) From 553bcae7d6fff7980815b1f510a5fef7271dd3db Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Mon, 2 Dec 2024 16:37:34 -0500 Subject: [PATCH 048/124] added new particle array --- src/framework/containers/particles.cpp | 9 +++++++++ src/framework/containers/particles.h | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index f0c64c4ee..c7f8f3b7c 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -47,6 +47,9 @@ namespace ntt { tag = array_t { label + "_tag", maxnpart }; tag_h = Kokkos::create_mirror_view(tag); + tag_offset = array_t { label + "_tag_offset", ntags() }; + tag_offset_h = Kokkos::create_mirror_view(tag_offset); + for (unsigned short n { 0 }; n < npld; ++n) { pld.push_back(array_t("pld", maxnpart)); pld_h.push_back(Kokkos::create_mirror_view(pld[n])); @@ -98,7 +101,13 @@ namespace ntt { std::vector npart_tag_vec; for (std::size_t t { 0 }; t < ntags(); ++t) { npart_tag_vec.push_back(npart_tag_host(t)); + tag_offset_h(t) = (t > 0) ? npart_tag_vec[t - 1] : 0; + } + for (std::size_t t { 0 }; t < ntags(); ++t) { + tag_offset_h(t) += (t > 0) ? tag_offset_h(t - 1) : 0; } + // Copy to device + Kokkos::deep_copy(tag_offset, tag_offset_h); return npart_tag_vec; } diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index b4831b64a..7496db78c 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -60,6 +60,8 @@ namespace ntt { array_t dx1_prev, dx2_prev, dx3_prev; // Array to tag the particles array_t tag; + // Array to store the cumulative number of particles per tag + array_t tag_offset; // Array to store the particle load std::vector> pld; // phi coordinate (for axisymmetry) @@ -72,6 +74,7 @@ namespace ntt { array_mirror_t weight_h; array_mirror_t phi_h; array_mirror_t tag_h; + array_mirror_t tag_offset_h; std::vector> pld_h; // for empty allocation @@ -178,6 +181,7 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); + footprint += sizeof(int) * tag_offset.extent(0); for (auto& p : pld) { footprint += sizeof(real_t) * p.extent(0); } From cffc563f664aeaceebe2e611945fb6852f9052e8 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Mon, 2 Dec 2024 16:37:56 -0500 Subject: [PATCH 049/124] added new sendbuffer function --- src/framework/domain/comm_mpi.hpp | 144 +++++++++++++++++ src/framework/domain/communications.cpp | 195 ++++++++++++++++++++++++ src/framework/domain/metadomain.h | 1 + 3 files changed, 340 insertions(+) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 63dd8271a..2067ab9a4 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -316,6 +316,7 @@ namespace comm { } } + void ParticleSendRecvCount(int send_rank, int recv_rank, const std::size_t& send_count, @@ -441,6 +442,149 @@ namespace comm { return recv_count; } + + template + void CommunicateParticleQuantityBuffer( array_t& arr, + int send_rank, + int recv_rank, + const range_tuple_t& send_slice, + const range_tuple_t& recv_slice, + Kokkos::View indices_to_send, + Kokkos::View indices_to_allocate) { + + array_t buffer( "buffer", indices_to_send.size() + + indices_to_allocate.size()); + // Populate the buffer for particle array + Kokkos::parallel_for( + "PopulateBuffer", + Kokkos::RangePolicy(0, indices_to_send.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_send(i); + buffer(i) = arr(idx); + }); + CommunicateParticleQuantity(buffer, send_rank, recv_rank, send_slice, recv_slice); + // Populate from buffer to the particle array + Kokkos::parallel_for( + "PopulateFromBuffer", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + arr(idx) = buffer(indices_to_send.size() + i); + }); + return; + } + + + template + void CommunicateParticlesBuffer(Particles& species, + Kokkos::View indices_to_send, + Kokkos::View indices_to_allocate, + int send_rank, + int recv_rank, + std::vector shifts_in_x){ + if ((send_rank < 0) && (recv_rank < 0)) { + raise::Error("No send or recv in SendRecvParticlesBuffered", HERE); + } + // Construct send and receive slice for the buffer + auto send_slice = range_tuple_t({ 0, indices_to_send.size() }); + auto recv_slice = range_tuple_t({ indices_to_send.size(), indices_to_send.size() + + indices_to_allocate.size() }); + // Send and receive the particles + CommunicateParticleQuantityBuffer(species.i1, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx1, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i1_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx1_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + if constexpr (D == Dim::_2D || D == Dim::_3D) { + CommunicateParticleQuantityBuffer(species.i2, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx2, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i2_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx2_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + } + if constexpr (D == Dim::_3D) { + CommunicateParticleQuantityBuffer(species.i3, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx3, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i3_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx3_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + } + CommunicateParticleQuantityBuffer(species.ux1, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.ux2, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.ux3, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.weight, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + CommunicateParticleQuantityBuffer(species.phi, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + } + for (auto p { 0 }; p < species.npld(); ++p) { + CommunicateParticleQuantityBuffer(species.pld[p], send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + } + // Set the tag for the received particles to be alive and perform the necessary displacements + auto& this_tag = species.tag; + + if constexpr (D == Dim::_1D) + { + const auto shift_in_x1 = shifts_in_x[0]; + auto& this_i1 = species.i1; + auto& this_i1_prev = species.i1_prev; + Kokkos::parallel_for( + "SetTagAlive", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + this_tag(idx) = static_cast(ParticleTag::alive); + this_i1(idx) += shift_in_x1; + this_i1_prev(idx) += shift_in_x1; + }); + } + + else if constexpr (D == Dim::_2D) + { + const auto shift_in_x1 = shifts_in_x[0]; + const auto shift_in_x2 = shifts_in_x[1]; + auto& this_i1 = species.i1; + auto& this_i2 = species.i2; + auto& this_i1_prev = species.i1_prev; + auto& this_i2_prev = species.i2_prev; + Kokkos::parallel_for( + "SetTagAlive", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + this_tag(idx) = static_cast(ParticleTag::alive); + this_i1(idx) += shift_in_x1; + this_i2(idx) += shift_in_x2; + this_i1_prev(idx) += shift_in_x1; + this_i2_prev(idx) += shift_in_x2; + }); + } + + else if constexpr (D == Dim::_3D) + { + const auto shift_in_x1 = shifts_in_x[0]; + const auto shift_in_x2 = shifts_in_x[1]; + const auto shift_in_x3 = shifts_in_x[2]; + auto& this_i1 = species.i1; + auto& this_i2 = species.i2; + auto& this_i3 = species.i3; + auto& this_i1_prev = species.i1_prev; + auto& this_i2_prev = species.i2_prev; + auto& this_i3_prev = species.i3_prev; + Kokkos::parallel_for( + "SetTagAlive", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + this_tag(idx) = static_cast(ParticleTag::alive); + this_i1(idx) += shift_in_x1; + this_i2(idx) += shift_in_x2; + this_i3(idx) += shift_in_x3; + this_i1_prev(idx) += shift_in_x1; + this_i2_prev(idx) += shift_in_x2; + this_i3_prev(idx) += shift_in_x3; + }); + } + return; + } + + } // namespace comm #endif // FRAMEWORK_DOMAIN_COMM_MPI_HPP diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 60524eedd..f484f664d 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -646,6 +646,201 @@ namespace ntt { } } + +/* + New function to communicate particles using a buffer +*/ +template + void Metadomain::CommunicateParticlesBuffer(Domain& domain, + timer::Timers* timers) { + raise::ErrorIf(timers == nullptr, + "Timers not passed when Comm::Prtl called", + HERE); + logger::Checkpoint("Communicating particles\n", HERE); + for (auto& species : domain.species) { + const auto npart_per_tag_arr = species.npart_per_tag(); + const auto tag_offset = species.tag_offset_h; + auto index_last = tag_offset[tag_offset.size() - 1] + + npart_per_tag_arr[npart_per_tag_arr.size() - 1]; + std::vector send_ranks, send_inds; + std::vector recv_ranks, recv_inds; + // at this point particles should already by tagged in the pusher +#if defined(MPI_ENABLED) + timers->start("Communications_sendrecv"); + // array that holds the number of particles to be received per tag + std::vector npart_per_tag_arr_recv(npart_per_tag_arr.size(), 0); + std::size_t total_recv_count = 0; + const std::size_t total_send_count = species.npart() - npart_per_tag_arr[ParticleTag::alive]; + for (auto& direction : dir::Directions::all) { + const auto [send_params, + recv_params] = GetSendRecvParams(this, domain, direction, true); + const auto [send_indrank, send_slice] = send_params; + const auto [recv_indrank, recv_slice] = recv_params; + const auto [send_ind, send_rank] = send_indrank; + const auto [recv_ind, recv_rank] = recv_indrank; + if (send_rank < 0 and recv_rank < 0) { + continue; + } + const auto send_dir_tag = mpi::PrtlSendTag::dir2tag(direction); + const auto nsend = npart_per_tag_arr[send_dir_tag]; + std::size_t nrecv = 0; + // Get the receive count + send_ranks.push_back(send_rank); + recv_ranks.push_back(recv_rank); + send_inds.push_back(send_ind); + recv_inds.push_back(recv_ind); + comm::ParticleSendRecvCount(send_rank, + recv_rank, + nsend, + nrecv); + total_recv_count += nrecv; + npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)] = nrecv; + } + timers->stop("Communications_sendrecv"); + raise::FatalIf((index_last + total_recv_count) >= species.maxnpart(), + "Too many particles to receive (cannot fit into maxptl)", + HERE); + // Now we know the number of particles to be sent and received per direction + /* permute vector contains the indices of the tags to send and receive + in the order of the directions + E.g., consider the following tag array + [ 0, 0, 3, 0, 1,...] + Then, permute vector will look something like + [0, 1, 3, ..., 2, ..., 4, ... ] + |<--------- >| |<----->| |<----->| .... + tag=0 ct tag=1 ct tag=3 ct + (dead) (alive) (tag1) ... + */ + timers->start("PermuteVector"); + auto& this_tag = species.tag; + auto& this_tag_offset = species.tag_offset; + Kokkos::View permute_vector("permute_vector", species.npart()); + Kokkos::View current_offset("current_offset", species.ntags()); + Kokkos::parallel_for( + "PermuteVector", + species.npart(), + Lambda(const std::size_t p) { + auto current_tag = this_tag(p); + auto idx_permute_vec = this_tag_offset(current_tag) + current_offset(current_tag); + Kokkos::atomic_fetch_add(¤t_offset(current_tag), 1); + permute_vector(idx_permute_vec) = static_cast(p); + }); + timers->stop("PermuteVector"); + + // allocation_vector(p) assigns the pth received particle + // to the pth hole in the array, or after npart() if p > sent+dead count. + Kokkos::View allocation_vector("allocation_vector", total_recv_count); + auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); + std::size_t n_alive = npart_per_tag_arr[ParticleTag::alive]; + std::size_t n_dead = npart_per_tag_arr[ParticleTag::dead]; + std::size_t n_holes = species.npart() - n_alive; + + timers->start("AllocationVector"); + Kokkos::parallel_for( + "AllocationVector", + total_recv_count, + Lambda(const std::size_t p) { + // Case: recevied particle count less than dead particle count -> replace dead particles + if (p < n_dead){ + allocation_vector(p) = permute_vector(p); + } + // Case: received particle count > dead particle count but < sent particle count -> replace + // sent particles + else if (p <= n_holes){ + allocation_vector(p) = permute_vector(n_alive + p); + } + // Case: received particle count exceeds sent + dead particles -> append at the end + else { + allocation_vector(p) = static_cast(index_last + (p - n_holes)); + } + }); + Kokkos::deep_copy(allocation_vector_h, allocation_vector); + timers->stop("AllocationVector"); + + std::size_t count_recv = 0; + std::size_t iteration = 0; + for (auto& direction : dir::Directions::all) { + // Get the coordinate shifts in xi + std::vector shifts_in_x; + auto recv_ind = recv_inds[iteration]; + if constexpr (D == Dim::_1D) { + int shift_in_x1 { 0 }; + if ((-direction)[0] == -1) { + shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); + } else if ((-direction)[0] == 1) { + shift_in_x1 = domain.mesh.n_active(in::x1); + } + shifts_in_x.push_back(shift_in_x1); + } + else if constexpr (D == Dim::_2D) { + int shift_in_x1 { 0 }, shift_in_x2 { 0 }; + if ((-direction)[0] == -1) { + shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); + } else if ((-direction)[0] == 1) { + shift_in_x1 = domain.mesh.n_active()[0]; + } + if ((-direction)[1] == -1) { + shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); + } else if ((-direction)[1] == 1) { + shift_in_x2 = domain.mesh.n_active(in::x2); + } + shifts_in_x.push_back(shift_in_x1); + shifts_in_x.push_back(shift_in_x2); + } + else if constexpr (D == Dim::_3D) { + int shift_in_x1 { 0 }, shift_in_x2 { 0 }, shift_in_x3 { 0 }; + if ((-direction)[0] == -1) { + shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); + } else if ((-direction)[0] == 1) { + shift_in_x1 = domain.mesh.n_active(in::x1); + } + if ((-direction)[1] == -1) { + shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); + } else if ((-direction)[1] == 1) { + shift_in_x2 = domain.mesh.n_active(in::x2); + } + if ((-direction)[2] == -1) { + shift_in_x3 = -subdomain(recv_ind).mesh.n_active(in::x3); + } else if ((-direction)[2] == 1) { + shift_in_x3 = domain.mesh.n_active(in::x3); + } + shifts_in_x.push_back(shift_in_x1); + shifts_in_x.push_back(shift_in_x2); + shifts_in_x.push_back(shift_in_x3); + } + + auto range_permute = std::make_pair(static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)]), + static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)] + + npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); + + auto range_allocate = std::make_pair(static_cast(allocation_vector_h(count_recv)), + static_cast(allocation_vector_h(count_recv) + + npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)])); + + // contains the indices of the holes where the received particles will be placed + auto indices_to_allocate = Kokkos::subview(allocation_vector, range_allocate); + // contains the indices of all particles of a given tag = mpi::PrtlSendTag::dir2tag(direction) + auto indices_to_send = Kokkos::subview(permute_vector, range_permute); + + // Main function that sends the particles and receives the arrays + auto send_rank = send_ranks[iteration]; + auto recv_rank = recv_ranks[iteration]; + comm::CommunicateParticlesBuffer( species, + indices_to_send, + indices_to_allocate, + send_rank, + recv_rank, + shifts_in_x); + count_recv += npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)]; + iteration++; + } + species.set_npart(index_last + std::max(total_recv_count, total_send_count) - total_send_count); +#endif + } + } + + + template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 7b3042b5b..e30bc8e97 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -89,6 +89,7 @@ namespace ntt { void CommunicateFields(Domain&, CommTags); void SynchronizeFields(Domain&, CommTags, const range_tuple_t& = { 0, 0 }); void CommunicateParticles(Domain&, timer::Timers*); + void CommunicateParticlesBuffer(Domain&, timer::Timers*); /** * @param global_ndomains total number of domains From 3c55ee7e4fc6255a2dcc772ab75b067df11281b1 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 3 Dec 2024 15:57:16 -0500 Subject: [PATCH 050/124] fixed bug in CommunicateParticlesBuffer and created metadomain object in benchmark.cpp --- benchmark/benchmark.cpp | 183 ++++++++++++++++++++---- src/framework/domain/comm_mpi.hpp | 15 +- src/framework/domain/communications.cpp | 44 +++--- 3 files changed, 193 insertions(+), 49 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index b5a7631c4..5ab9124f0 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -1,35 +1,170 @@ #include "enums.h" #include "global.h" +#include "utils/timer.h" +#include "utils/error.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" #include "framework/containers/particles.h" +#include "metrics/metric_base.h" +#include "metrics/minkowski.h" +#include "arch/mpi_tags.h" + +#include + +/* + Test to check the performance of the new particle allocation scheme + - Create a metadomain object + - Create particle array + - Initialize the position and velocities of the particles + - Set a large timestep (see where that is set) + - Make a loop of N iterations, where the positions of particles is sorted + and pushed + - Check if the particle tags are correct after each iteration + - Compute the time taken for best of N iterations for the communication + */ + + +/* + Structure of the 2D domain + ---------------------------------- (3,3) + | | | | + | | | | + | | | | + | | | | + ---------------------------------- (3,2) + | | | | + | | | | + | | | | + | | | | + ---------------------------------- (3,1) + | | | | + | | | | + | | | | + | | | | + ---------------------------------- + (0,0) (1,0) (2,0) (3,0) +*/ + +/* + Function to check the tags of a domain object to make sure that + all the tags are alive. If the tags are not alive then the function + prints the tag count for each of the particles along with the rank + of the domain. +*/ +template +void CheckDomainTags(Domain& domain, + timer::Timers* timers) +{ + bool all_alive = true; + bool no_dead_particles = true; + bool tag_check = true; + for (auto& species : domain.species) { + std::cout << "Checking domain tags for species: " << species.label << std::endl; + const auto npart_per_tag_arr = species.npart_per_tag(); + const auto npart = species.npart(); + if (npart != npart_per_tag_arr[ParticleTag::alive]){ + all_alive = false; + } + for (std::size_t i = 0; i < npart_per_tag_arr.size(); ++i) { + if (i == ParticleTag::alive) { + continue; + } + if (npart_per_tag_arr[i] != 0) { + no_dead_particles = false; + } + } + auto this_tag = species.tag; + Kokkos::parallel_for("CheckTags", + npart, Lambda(const std::size_t i) { + if (this_tag(i) != ParticleTag::alive) { + tag_check = false; + } + }); + raise::ErrorIf(all_alive == false, + "Array contains particles with tags other than alive", + HERE); + raise::ErrorIf(no_dead_particles == false, + "Array contains dead particles", + HERE); + raise::ErrorIf(tag_check == false, + "Tag check failed", + HERE); + } + return; +} + + auto main(int argc, char* argv[]) -> int { + std::cout << "Constructing the domain" << std::endl; ntt::GlobalInitialize(argc, argv); - // auto species = ntt::ParticleSpecies(1u, - // "test_e", - // 1.0f, - // 1.0f, - // 10000000, - // ntt::PrtlPusher::BORIS, - // false, - // ntt::Cooling::NONE); + // Create a Metadomain object + const unsigned int ndomains = 9; + const std::vector global_decomposition = {-1, -1}; + const std::vector global_ncells = {32, 32}; + const boundaries_t global_extent = {{0.0, 0.0}, {3.0, 3.0}}; + const boundaries_t global_flds_bc = {{FldsBC::PERIODIC, FldsBC::PERIODIC}, {FldsBC::PERIODIC, FldsBC::PERIODIC}}; + const boundaries_t global_prtl_bc = {{PrtlBC::PERIODIC, PrtlBC::PERIODIC}, {PrtlBC::PERIODIC, PrtlBC::PERIODIC}}; + const std::map metric_params = {}; + const int npart = 10000; + auto species = ntt::Particles(1u, + "test_e", + 1.0f, + 1.0f, + npart, + ntt::PrtlPusher::BORIS, + false, + ntt::Cooling::NONE); + auto metadomain = Metadomain> + ( + ndomains, + global_decomposition, + global_ncells, + global_extent, + global_flds_bc, + global_prtl_bc, + metric_params, + {species} + ); + // Get the pointers to all the subdomains + const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + + // Set the positions of the particles in each domain + for (auto& species : local_domain->species) + { + auto tag = ParticleTag::alive; + auto &this_i1 = species.i1; + auto &this_i2 = species.i2; + auto &this_i3 = species.i3; + auto &this_dx1 = species.dx1; + auto &this_dx2 = species.dx2; + auto &this_dx3 = species.dx3; + auto &this_ux1 = species.ux1; + auto &this_ux2 = species.ux2; + auto &this_ux3 = species.ux3; + auto &this_tag = species.tag; + Kokkos::parallel_for("SetPositions", + species.npart(), Lambda(const std::size_t i) { + this_i1(i) = 1; + this_i2(i) = 1; + this_dx1(i) = 0.01; + this_dx2(i) = 0.01; + this_ux1(i) = 0.5; + this_ux2(i) = 0.5; + this_tag(i) = tag; + }); + } + + + // Print the number of particles per domain + std::cout << "Number of particles in domain " << local_subdomain_idx << ": " << local_domain->species[0].npart() << std::endl; + // Print the position of the 5 particles in the domain + ntt::GlobalFinalize(); - // * @param global_ndomains total number of domains - // * @param global_decomposition decomposition of the global domain - // * @param global_ncells number of cells in each dimension - // * @param global_extent physical extent of the global domain - // * @param global_flds_bc boundary conditions for fields - // * @param global_prtl_bc boundary conditions for particles - // * @param metric_params parameters for the metric - // * @param species_params parameters for the particle species - // Metadomain(unsigned int, - // const std::vector&, - // const std::vector&, - // const boundaries_t&, - // const boundaries_t&, - // const boundaries_t&, - // const std::map&, - // const std::vector&); + + std::cout << "Terminating" << std::endl; return 0; } diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 2067ab9a4..ed73302b2 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -452,13 +452,13 @@ namespace comm { Kokkos::View indices_to_send, Kokkos::View indices_to_allocate) { - array_t buffer( "buffer", indices_to_send.size() + - indices_to_allocate.size()); + array_t buffer( "buffer", indices_to_send.extent(0) + + indices_to_allocate.extent(0)); // Populate the buffer for particle array Kokkos::parallel_for( "PopulateBuffer", - Kokkos::RangePolicy(0, indices_to_send.size()), - KOKKOS_LAMBDA(const size_t i) { + indices_to_send.extent(0), + Lambda(const size_t i) { const auto idx = indices_to_send(i); buffer(i) = arr(idx); }); @@ -466,15 +466,14 @@ namespace comm { // Populate from buffer to the particle array Kokkos::parallel_for( "PopulateFromBuffer", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { + indices_to_allocate.extent(0), + Lambda(const size_t i) { const auto idx = indices_to_allocate(i); - arr(idx) = buffer(indices_to_send.size() + i); + arr(idx) = buffer(indices_to_send.extent(0) + i); }); return; } - template void CommunicateParticlesBuffer(Particles& species, Kokkos::View indices_to_send, diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index f484f664d..4ad29a327 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -660,7 +660,7 @@ template for (auto& species : domain.species) { const auto npart_per_tag_arr = species.npart_per_tag(); const auto tag_offset = species.tag_offset_h; - auto index_last = tag_offset[tag_offset.size() - 1] + + auto index_last = tag_offset[tag_offset.extent(0) - 1] + npart_per_tag_arr[npart_per_tag_arr.size() - 1]; std::vector send_ranks, send_inds; std::vector recv_ranks, recv_inds; @@ -668,9 +668,11 @@ template #if defined(MPI_ENABLED) timers->start("Communications_sendrecv"); // array that holds the number of particles to be received per tag - std::vector npart_per_tag_arr_recv(npart_per_tag_arr.size(), 0); + std::vector npart_per_tag_arr_recv(npart_per_tag_arr.size(), 0); std::size_t total_recv_count = 0; - const std::size_t total_send_count = species.npart() - npart_per_tag_arr[ParticleTag::alive]; + const std::size_t total_send_count = species.npart() - + npart_per_tag_arr[ParticleTag::alive] - + npart_per_tag_arr[ParticleTag::dead]; for (auto& direction : dir::Directions::all) { const auto [send_params, recv_params] = GetSendRecvParams(this, domain, direction, true); @@ -714,8 +716,10 @@ template timers->start("PermuteVector"); auto& this_tag = species.tag; auto& this_tag_offset = species.tag_offset; - Kokkos::View permute_vector("permute_vector", species.npart()); - Kokkos::View current_offset("current_offset", species.ntags()); + Kokkos::View permute_vector("permute_vector", species.npart()); + // Current offset is a helper array used to create permute vector + // It stores the number of particles of a given tag type stored during the loop + Kokkos::View current_offset("current_offset", species.ntags()); Kokkos::parallel_for( "PermuteVector", species.npart(), @@ -729,7 +733,7 @@ template // allocation_vector(p) assigns the pth received particle // to the pth hole in the array, or after npart() if p > sent+dead count. - Kokkos::View allocation_vector("allocation_vector", total_recv_count); + Kokkos::View allocation_vector("allocation_vector", total_recv_count); auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); std::size_t n_alive = npart_per_tag_arr[ParticleTag::alive]; std::size_t n_dead = npart_per_tag_arr[ParticleTag::dead]; @@ -759,7 +763,15 @@ template std::size_t count_recv = 0; std::size_t iteration = 0; + // Main loop over all direction where we send the data for (auto& direction : dir::Directions::all) { + // When nowhere to send and receive + auto send_rank = send_ranks[iteration]; + auto recv_rank = recv_ranks[iteration]; + + if (send_rank < 0 and recv_rank < 0) { + continue; + } // Get the coordinate shifts in xi std::vector shifts_in_x; auto recv_ind = recv_inds[iteration]; @@ -809,22 +821,20 @@ template shifts_in_x.push_back(shift_in_x3); } - auto range_permute = std::make_pair(static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)]), - static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)] + + // Tuple that contains the start and end indices of permtute_vec pointing to a given tag type = dir2tag(dir) + auto range_permute = std::make_pair(static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)]), + static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)] + npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); - - auto range_allocate = std::make_pair(static_cast(allocation_vector_h(count_recv)), - static_cast(allocation_vector_h(count_recv) + - npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)])); - - // contains the indices of the holes where the received particles will be placed - auto indices_to_allocate = Kokkos::subview(allocation_vector, range_allocate); + // Tuple that contains the start and end indices for allocation_vector pointing to a given tag type = dir2tag(dir) + auto range_allocate = std::make_pair(static_cast(count_recv), + static_cast(count_recv + + npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)])); // contains the indices of all particles of a given tag = mpi::PrtlSendTag::dir2tag(direction) auto indices_to_send = Kokkos::subview(permute_vector, range_permute); + // contains the indices of the holes where the received particles will be placed + auto indices_to_allocate = Kokkos::subview(allocation_vector, range_allocate); // Main function that sends the particles and receives the arrays - auto send_rank = send_ranks[iteration]; - auto recv_rank = recv_ranks[iteration]; comm::CommunicateParticlesBuffer( species, indices_to_send, indices_to_allocate, From 5f1fe4a46881b4b28262e7d439a0da15600b9324 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 3 Dec 2024 16:35:07 -0500 Subject: [PATCH 051/124] Printing nparticles per domain --- benchmark/benchmark.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 5ab9124f0..4a8923027 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -128,7 +128,11 @@ auto main(int argc, char* argv[]) -> int { {species} ); // Get the pointers to all the subdomains - const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + //int rank; + //MPI_Comm_rank(MPI_COMM_WORLD, &rank); + for (int i=0; i int { species.npart(), Lambda(const std::size_t i) { this_i1(i) = 1; this_i2(i) = 1; + this_i3(i) = 0; this_dx1(i) = 0.01; this_dx2(i) = 0.01; this_ux1(i) = 0.5; @@ -157,11 +162,17 @@ auto main(int argc, char* argv[]) -> int { }); } - + // Get and print the extent of each domain + std::cout << fmt::format("x1 extent {%.2f; %.2f} \n", + local_domain->mesh.extent(in::x1).first, + local_domain->mesh.extent(in::x1).second); + std::cout << fmt::format("x2 extent {%.2f; %.2f} \n", + local_domain->mesh.extent(in::x2).first, + local_domain->mesh.extent(in::x2).second); // Print the number of particles per domain std::cout << "Number of particles in domain " << local_subdomain_idx << ": " << local_domain->species[0].npart() << std::endl; // Print the position of the 5 particles in the domain - + } ntt::GlobalFinalize(); std::cout << "Terminating" << std::endl; From 5b14e3474ec4f9494db572cdaf22add4d2e4768f Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 3 Dec 2024 18:45:56 -0500 Subject: [PATCH 052/124] Printing particle count in domain --- benchmark/benchmark.cpp | 150 ++++++++++++++++++++++++++-------------- 1 file changed, 97 insertions(+), 53 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 4a8923027..5eebb4d2d 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -1,7 +1,6 @@ #include "enums.h" #include "global.h" -#include "utils/timer.h" #include "utils/error.h" #include "framework/domain/domain.h" #include "framework/domain/metadomain.h" @@ -11,6 +10,15 @@ #include "arch/mpi_tags.h" #include +#define TIMER_START(label) \ + Kokkos::fence(); \ + auto start_##label = std::chrono::high_resolution_clock::now(); + +#define TIMER_STOP(label) \ + Kokkos::fence(); \ + auto stop_##label = std::chrono::high_resolution_clock::now(); \ + auto duration_##label = std::chrono::duration_cast(stop_##label - start_##label).count(); \ + std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" << std::endl; /* Test to check the performance of the new particle allocation scheme @@ -53,14 +61,12 @@ of the domain. */ template -void CheckDomainTags(Domain& domain, - timer::Timers* timers) +void CheckDomainTags(Domain& domain) { bool all_alive = true; bool no_dead_particles = true; - bool tag_check = true; for (auto& species : domain.species) { - std::cout << "Checking domain tags for species: " << species.label << std::endl; + std::cout << "Checking domain tags for species: " << species.label() << std::endl; const auto npart_per_tag_arr = species.npart_per_tag(); const auto npart = species.npart(); if (npart != npart_per_tag_arr[ParticleTag::alive]){ @@ -74,26 +80,33 @@ void CheckDomainTags(Domain& domain, no_dead_particles = false; } } - auto this_tag = species.tag; - Kokkos::parallel_for("CheckTags", - npart, Lambda(const std::size_t i) { - if (this_tag(i) != ParticleTag::alive) { - tag_check = false; - } - }); + raise::ErrorIf(all_alive == false, "Array contains particles with tags other than alive", HERE); raise::ErrorIf(no_dead_particles == false, "Array contains dead particles", HERE); - raise::ErrorIf(tag_check == false, - "Tag check failed", - HERE); + //raise::ErrorIf(tag_check_h(0) == false, + // "Tag check failed", + // HERE); } return; } +void InitializePositionsDomain(Domain>& domain) +{ + for (auto& species : domain.species) { + TIMER_START(Sorting_timer); + species.SortByTags(); + TIMER_STOP(Sorting_timer); + species.SyncHostDevice(); + std::cout << "Number of particles in domain: " << species.npart() << std::endl; + //std::cout << "Extent of i1" << species.i1.extent(0) << std::endl; + } + CheckDomainTags(domain); +} + auto main(int argc, char* argv[]) -> int { @@ -101,44 +114,23 @@ auto main(int argc, char* argv[]) -> int { ntt::GlobalInitialize(argc, argv); // Create a Metadomain object const unsigned int ndomains = 9; - const std::vector global_decomposition = {-1, -1}; + const std::vector global_decomposition = {{}}; const std::vector global_ncells = {32, 32}; const boundaries_t global_extent = {{0.0, 0.0}, {3.0, 3.0}}; const boundaries_t global_flds_bc = {{FldsBC::PERIODIC, FldsBC::PERIODIC}, {FldsBC::PERIODIC, FldsBC::PERIODIC}}; const boundaries_t global_prtl_bc = {{PrtlBC::PERIODIC, PrtlBC::PERIODIC}, {PrtlBC::PERIODIC, PrtlBC::PERIODIC}}; const std::map metric_params = {}; - const int npart = 10000; + const int maxnpart = 1000; auto species = ntt::Particles(1u, "test_e", 1.0f, 1.0f, - npart, + maxnpart, ntt::PrtlPusher::BORIS, false, ntt::Cooling::NONE); - auto metadomain = Metadomain> - ( - ndomains, - global_decomposition, - global_ncells, - global_extent, - global_flds_bc, - global_prtl_bc, - metric_params, - {species} - ); - // Get the pointers to all the subdomains - //int rank; - //MPI_Comm_rank(MPI_COMM_WORLD, &rank); - for (int i=0; ispecies) - { - auto tag = ParticleTag::alive; + species.set_npart(maxnpart); auto &this_i1 = species.i1; auto &this_i2 = species.i2; auto &this_i3 = species.i3; @@ -149,30 +141,82 @@ auto main(int argc, char* argv[]) -> int { auto &this_ux2 = species.ux2; auto &this_ux3 = species.ux3; auto &this_tag = species.tag; + + std::cout << "Species particle count is " << species.npart() << std::endl; Kokkos::parallel_for("SetPositions", species.npart(), Lambda(const std::size_t i) { this_i1(i) = 1; this_i2(i) = 1; - this_i3(i) = 0; + this_i3(i) = 1; this_dx1(i) = 0.01; this_dx2(i) = 0.01; - this_ux1(i) = 0.5; - this_ux2(i) = 0.5; - this_tag(i) = tag; + this_ux1(i) = 0.; + this_ux2(i) = 0.; + this_ux3(i) = 0.; + this_tag(i) = 1; }); - } + Kokkos::fence(); + std::cout << "Species set " << species.npart() << std::endl; + auto metadomain = Metadomain> + ( ndomains, + global_decomposition, + global_ncells, + global_extent, + global_flds_bc, + global_prtl_bc, + metric_params, + {species} + ); + + //metadomain.runOnLocalDomains([&](auto& loc_dom) { + // InitializePositionsDomain(loc_dom); + //}); + + // Get the pointer to the subdomain + //const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + //auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + + // Set the positions of the particles in each domain + //for (auto& species : local_domain->species) + //{ + // auto tag = ParticleTag::alive; + // auto &this_i1 = species.i1; + // auto &this_i2 = species.i2; + // auto &this_i3 = species.i3; + // auto &this_dx1 = species.dx1; + // auto &this_dx2 = species.dx2; + // auto &this_dx3 = species.dx3; + // auto &this_ux1 = species.ux1; + // auto &this_ux2 = species.ux2; + // auto &this_ux3 = species.ux3; + // auto &this_tag = species.tag; + // Kokkos::parallel_for("SetPositions", + // species.npart(), Lambda(const std::size_t i) { + // this_i1(i) = 1; + // this_i2(i) = 1; + // this_i3(i) = 0; + // this_dx1(i) = 0.01; + // this_dx2(i) = 0.01; + // this_ux1(i) = 0.5; + // this_ux2(i) = 0.5; + // this_tag(i) = tag; + // }); +// + //species.SortByTags(); + //species.SyncHostDevice(); + //} // Get and print the extent of each domain - std::cout << fmt::format("x1 extent {%.2f; %.2f} \n", - local_domain->mesh.extent(in::x1).first, - local_domain->mesh.extent(in::x1).second); - std::cout << fmt::format("x2 extent {%.2f; %.2f} \n", - local_domain->mesh.extent(in::x2).first, - local_domain->mesh.extent(in::x2).second); + //std::cout << fmt::format("x1 extent {%.2f; %.2f} \n", + // local_domain->mesh.extent(in::x1).first, + // local_domain->mesh.extent(in::x1).second); + //std::cout << fmt::format("x2 extent {%.2f; %.2f} \n", + // local_domain->mesh.extent(in::x2).first, + // local_domain->mesh.extent(in::x2).second); // Print the number of particles per domain - std::cout << "Number of particles in domain " << local_subdomain_idx << ": " << local_domain->species[0].npart() << std::endl; + //std::cout << "Number of particles in domain " << local_subdomain_idx << ": " << local_domain->species[0].npart() << std::endl; // Print the position of the 5 particles in the domain - } + ntt::GlobalFinalize(); std::cout << "Terminating" << std::endl; From 1b0993d7b475a0be005686cbd1516f13a7b762e5 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Fri, 6 Dec 2024 15:13:14 -0500 Subject: [PATCH 053/124] benchmark/benchmark.cpp fixed benchmark.cpp --- src/framework/containers/particles.cpp | 3 +++ src/framework/domain/comm_mpi.hpp | 2 -- src/framework/domain/communications.cpp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index c7f8f3b7c..c97f8da2d 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -85,6 +85,9 @@ namespace ntt { auto this_tag = tag; array_t npart_tag("npart_tags", ntags()); + // Print tag_h array + auto tag_host = Kokkos::create_mirror_view(tag); + Kokkos::deep_copy(tag_host, tag); auto npart_tag_scatter = Kokkos::Experimental::create_scatter_view(npart_tag); Kokkos::parallel_for( "NpartPerTag", diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index ed73302b2..d29a5758b 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -363,12 +363,10 @@ namespace comm { recv_rank, send_slice.second - send_slice.first, recv_count); - raise::FatalIf((index_last + recv_count) >= species.maxnpart(), "Too many particles to receive (cannot fit into maxptl)", HERE); const auto recv_slice = range_tuple_t({ index_last, index_last + recv_count }); - CommunicateParticleQuantity(species.i1, send_rank, recv_rank, send_slice, recv_slice); CommunicateParticleQuantity(species.dx1, send_rank, recv_rank, send_slice, recv_slice); CommunicateParticleQuantity(species.i1_prev, diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 4ad29a327..cdd32aed6 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -631,7 +631,6 @@ namespace ntt { index_last += recv_count; species.set_npart(index_last); } - Kokkos::deep_copy( Kokkos::subview(species.tag, std::make_pair(send_pmin, send_pmax)), ParticleTag::dead); @@ -844,6 +843,7 @@ template count_recv += npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)]; iteration++; } + // If receive count is less than send count then make the tags of sent dead ? Ask Hayk species.set_npart(index_last + std::max(total_recv_count, total_send_count) - total_send_count); #endif } From 320292467f50dbd31ceb4036ab7a9489765b7420 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Thu, 12 Dec 2024 16:48:27 -0500 Subject: [PATCH 054/124] create mirror views for MPISendRecv in comm_mpi --- src/framework/domain/comm_mpi.hpp | 41 ++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index d29a5758b..33431cfe7 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -47,15 +47,19 @@ namespace comm { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); + raise::ErrorIf( (send_rank == rank && send_idx != idx) || (recv_rank == rank && recv_idx != idx), "Multiple-domain single-rank communication not yet implemented", HERE); + if ((send_idx == idx) and (recv_idx == idx)) { // trivial copy if sending to self and receiving from self + if (not additive) { + // simply filling the ghost cells if constexpr (D == Dim::_1D) { Kokkos::deep_copy(Kokkos::subview(fld, recv_slice[0], comps), @@ -65,6 +69,7 @@ namespace comm { Kokkos::subview(fld, recv_slice[0], recv_slice[1], comps), Kokkos::subview(fld, send_slice[0], send_slice[1], comps)); } else if constexpr (D == Dim::_3D) { + Kokkos::deep_copy( Kokkos::subview(fld, recv_slice[0], recv_slice[1], recv_slice[2], comps), Kokkos::subview(fld, send_slice[0], send_slice[1], send_slice[2], comps)); @@ -177,13 +182,19 @@ namespace comm { comps.second - comps.first); } } + + auto send_fld_h = Kokkos::create_mirror_view(send_fld); + auto recv_fld_h = Kokkos::create_mirror_view(recv_fld); + Kokkos::deep_copy(send_fld_h, send_fld); if (send_rank >= 0 && recv_rank >= 0) { - MPI_Sendrecv(send_fld.data(), + // Segfault here: print mpi params + // Create host views + MPI_Sendrecv(send_fld_h.data(), nsend, mpi::get_type(), send_rank, 0, - recv_fld.data(), + recv_fld_h.data(), nrecv, mpi::get_type(), recv_rank, @@ -191,14 +202,16 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if (send_rank >= 0) { - MPI_Send(send_fld.data(), + MPI_Send(send_fld_h.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); + } else if (recv_rank >= 0) { - MPI_Recv(recv_fld.data(), + auto recv_fld_h = Kokkos::create_mirror_view(recv_fld); + MPI_Recv(recv_fld_h.data(), nrecv, mpi::get_type(), recv_rank, @@ -208,7 +221,10 @@ namespace comm { } else { raise::Error("CommunicateField called with negative ranks", HERE); } + Kokkos::deep_copy(recv_fld, recv_fld_h); + if (recv_rank >= 0) { + // !TODO: perhaps directly recv to the fld? if (not additive) { if constexpr (D == Dim::_1D) { @@ -282,16 +298,18 @@ namespace comm { int recv_rank, const range_tuple_t& send_slice, const range_tuple_t& recv_slice) { + auto array_h = Kokkos::create_mirror_view(arr); + Kokkos::deep_copy(array_h, arr); const std::size_t send_count = send_slice.second - send_slice.first; const std::size_t recv_count = recv_slice.second - recv_slice.first; if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { - MPI_Sendrecv(arr.data() + send_slice.first, + MPI_Sendrecv(array_h.data() + send_slice.first, send_count, mpi::get_type(), send_rank, 0, - arr.data() + recv_slice.first, + array_h.data() + recv_slice.first, recv_count, mpi::get_type(), recv_rank, @@ -299,14 +317,14 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { - MPI_Send(arr.data() + send_slice.first, + MPI_Send(array_h.data() + send_slice.first, send_count, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { - MPI_Recv(arr.data() + recv_slice.first, + MPI_Recv(array_h.data() + recv_slice.first, recv_count, mpi::get_type(), recv_rank, @@ -314,6 +332,7 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } + Kokkos::deep_copy(arr, array_h); } @@ -457,8 +476,7 @@ namespace comm { "PopulateBuffer", indices_to_send.extent(0), Lambda(const size_t i) { - const auto idx = indices_to_send(i); - buffer(i) = arr(idx); + buffer(i) = arr(indices_to_send(i)); }); CommunicateParticleQuantity(buffer, send_rank, recv_rank, send_slice, recv_slice); // Populate from buffer to the particle array @@ -466,8 +484,7 @@ namespace comm { "PopulateFromBuffer", indices_to_allocate.extent(0), Lambda(const size_t i) { - const auto idx = indices_to_allocate(i); - arr(idx) = buffer(indices_to_send.extent(0) + i); + arr(indices_to_allocate(i)) = buffer(indices_to_send.extent(0) + i); }); return; } From 831e7d9cc12e3f484e14c09e3362dfbf00049a39 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Thu, 12 Dec 2024 16:49:43 -0500 Subject: [PATCH 055/124] fixed function to time old and new communication routines --- benchmark/benchmark.cpp | 282 +++++++++++++++++----------------------- 1 file changed, 116 insertions(+), 166 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 5eebb4d2d..6bfe5c7c7 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -5,11 +5,11 @@ #include "framework/domain/domain.h" #include "framework/domain/metadomain.h" #include "framework/containers/particles.h" +#include "framework/domain/communications.cpp" #include "metrics/metric_base.h" #include "metrics/minkowski.h" -#include "arch/mpi_tags.h" - #include + #define TIMER_START(label) \ Kokkos::fence(); \ auto start_##label = std::chrono::high_resolution_clock::now(); @@ -22,105 +22,88 @@ /* Test to check the performance of the new particle allocation scheme - - Create a metadomain object - - Create particle array - - Initialize the position and velocities of the particles - - Set a large timestep (see where that is set) - - Make a loop of N iterations, where the positions of particles is sorted - and pushed - - Check if the particle tags are correct after each iteration + - Create a metadomain object main() + - Set npart + initialize tags InitializeParticleArrays() + - 'Push' the particles by randomly updating the tags PushParticles() + - Communicate particles to neighbors and time the communication - Compute the time taken for best of N iterations for the communication */ - -/* - Structure of the 2D domain - ---------------------------------- (3,3) - | | | | - | | | | - | | | | - | | | | - ---------------------------------- (3,2) - | | | | - | | | | - | | | | - | | | | - ---------------------------------- (3,1) - | | | | - | | | | - | | | | - | | | | - ---------------------------------- - (0,0) (1,0) (2,0) (3,0) -*/ - -/* - Function to check the tags of a domain object to make sure that - all the tags are alive. If the tags are not alive then the function - prints the tag count for each of the particles along with the rank - of the domain. -*/ +// Set npart and set the particle tags to alive template -void CheckDomainTags(Domain& domain) -{ - bool all_alive = true; - bool no_dead_particles = true; - for (auto& species : domain.species) { - std::cout << "Checking domain tags for species: " << species.label() << std::endl; - const auto npart_per_tag_arr = species.npart_per_tag(); - const auto npart = species.npart(); - if (npart != npart_per_tag_arr[ParticleTag::alive]){ - all_alive = false; - } - for (std::size_t i = 0; i < npart_per_tag_arr.size(); ++i) { - if (i == ParticleTag::alive) { - continue; - } - if (npart_per_tag_arr[i] != 0) { - no_dead_particles = false; +void InitializeParticleArrays(Domain &domain, const int npart){ + raise::ErrorIf(npart > domain.species[0].maxnpart(), + "Npart cannot be greater than maxnpart", HERE); + const auto nspecies = domain.species.size(); + for (int i_spec = 0; i_spec < nspecies; i_spec++) { + domain.species[i_spec].set_npart(npart); + domain.species[i_spec].SyncHostDevice(); + auto &this_tag = domain.species[i_spec].tag; + Kokkos::parallel_for( + "Initialize particles", + npart, + Lambda(const std::size_t i) + { + this_tag(i) = ParticleTag::alive; } - } - - raise::ErrorIf(all_alive == false, - "Array contains particles with tags other than alive", - HERE); - raise::ErrorIf(no_dead_particles == false, - "Array contains dead particles", - HERE); - //raise::ErrorIf(tag_check_h(0) == false, - // "Tag check failed", - // HERE); + ); } return; } -void InitializePositionsDomain(Domain>& domain) -{ - for (auto& species : domain.species) { - TIMER_START(Sorting_timer); - species.SortByTags(); - TIMER_STOP(Sorting_timer); - species.SyncHostDevice(); - std::cout << "Number of particles in domain: " << species.npart() << std::endl; - //std::cout << "Extent of i1" << species.i1.extent(0) << std::endl; +// Randomly reassign tags to particles for a fraction of particles +template +void PushParticles(Domain &domain, const double send_frac, + const int seed_ind, const int seed_tag){ + raise::ErrorIf(send_frac > 1.0, "send_frac cannot be greater than 1.0", HERE); + const auto nspecies = domain.species.size(); + for (int i_spec = 0; i_spec < nspecies; i_spec++) { + domain.species[i_spec].set_unsorted(); + const auto nparticles = domain.species[i_spec].npart(); + const auto nparticles_to_send = static_cast(send_frac * nparticles); + // Generate random indices to send + Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); + Kokkos::View indices_to_send("indices_to_send", nparticles_to_send); + Kokkos::fill_random(indices_to_send, random_pool, 0, nparticles); + // Generate random tags to send + Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); + Kokkos::View tags_to_send("tags_to_send", nparticles_to_send); + Kokkos::fill_random(tags_to_send, random_pool_tag, 0, domain.species[i_spec].ntags()); + auto &this_tag = domain.species[i_spec].tag; + Kokkos::parallel_for( + "Push particles", + nparticles_to_send, + Lambda(const std::size_t i) + { + auto prtl_to_send = indices_to_send(i); + auto tag_to_send = tags_to_send(i); + this_tag(prtl_to_send) = tag_to_send; + } + ); + domain.species[i_spec].npart_per_tag(); + domain.species[i_spec].SyncHostDevice(); } - CheckDomainTags(domain); + return; } - - auto main(int argc, char* argv[]) -> int { std::cout << "Constructing the domain" << std::endl; ntt::GlobalInitialize(argc, argv); // Create a Metadomain object - const unsigned int ndomains = 9; - const std::vector global_decomposition = {{}}; - const std::vector global_ncells = {32, 32}; - const boundaries_t global_extent = {{0.0, 0.0}, {3.0, 3.0}}; - const boundaries_t global_flds_bc = {{FldsBC::PERIODIC, FldsBC::PERIODIC}, {FldsBC::PERIODIC, FldsBC::PERIODIC}}; - const boundaries_t global_prtl_bc = {{PrtlBC::PERIODIC, PrtlBC::PERIODIC}, {PrtlBC::PERIODIC, PrtlBC::PERIODIC}}; + const unsigned int ndomains = 1; + const std::vector global_decomposition = {{-1,-1, -1}}; + const std::vector global_ncells = {32, 32, 32}; + const boundaries_t global_extent = {{0.0, 3.0}, {0.0, 3.0}, {0.0, 3.0}}; + const boundaries_t global_flds_bc = { {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC}}; + const boundaries_t global_prtl_bc = { {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}}; const std::map metric_params = {}; - const int maxnpart = 1000; + const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; + const double npart_to_send_frac = 0.01; + const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); auto species = ntt::Particles(1u, "test_e", 1.0f, @@ -129,35 +112,7 @@ auto main(int argc, char* argv[]) -> int { ntt::PrtlPusher::BORIS, false, ntt::Cooling::NONE); - - species.set_npart(maxnpart); - auto &this_i1 = species.i1; - auto &this_i2 = species.i2; - auto &this_i3 = species.i3; - auto &this_dx1 = species.dx1; - auto &this_dx2 = species.dx2; - auto &this_dx3 = species.dx3; - auto &this_ux1 = species.ux1; - auto &this_ux2 = species.ux2; - auto &this_ux3 = species.ux3; - auto &this_tag = species.tag; - - std::cout << "Species particle count is " << species.npart() << std::endl; - Kokkos::parallel_for("SetPositions", - species.npart(), Lambda(const std::size_t i) { - this_i1(i) = 1; - this_i2(i) = 1; - this_i3(i) = 1; - this_dx1(i) = 0.01; - this_dx2(i) = 0.01; - this_ux1(i) = 0.; - this_ux2(i) = 0.; - this_ux3(i) = 0.; - this_tag(i) = 1; - }); - Kokkos::fence(); - std::cout << "Species set " << species.npart() << std::endl; - auto metadomain = Metadomain> + auto metadomain = Metadomain> ( ndomains, global_decomposition, global_ncells, @@ -168,58 +123,53 @@ auto main(int argc, char* argv[]) -> int { {species} ); - //metadomain.runOnLocalDomains([&](auto& loc_dom) { - // InitializePositionsDomain(loc_dom); - //}); - - // Get the pointer to the subdomain - //const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; - //auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); - - // Set the positions of the particles in each domain - //for (auto& species : local_domain->species) - //{ - // auto tag = ParticleTag::alive; - // auto &this_i1 = species.i1; - // auto &this_i2 = species.i2; - // auto &this_i3 = species.i3; - // auto &this_dx1 = species.dx1; - // auto &this_dx2 = species.dx2; - // auto &this_dx3 = species.dx3; - // auto &this_ux1 = species.ux1; - // auto &this_ux2 = species.ux2; - // auto &this_ux3 = species.ux3; - // auto &this_tag = species.tag; - // Kokkos::parallel_for("SetPositions", - // species.npart(), Lambda(const std::size_t i) { - // this_i1(i) = 1; - // this_i2(i) = 1; - // this_i3(i) = 0; - // this_dx1(i) = 0.01; - // this_dx2(i) = 0.01; - // this_ux1(i) = 0.5; - // this_ux2(i) = 0.5; - // this_tag(i) = tag; - // }); -// - //species.SortByTags(); - //species.SyncHostDevice(); - //} - - // Get and print the extent of each domain - //std::cout << fmt::format("x1 extent {%.2f; %.2f} \n", - // local_domain->mesh.extent(in::x1).first, - // local_domain->mesh.extent(in::x1).second); - //std::cout << fmt::format("x2 extent {%.2f; %.2f} \n", - // local_domain->mesh.extent(in::x2).first, - // local_domain->mesh.extent(in::x2).second); - // Print the number of particles per domain - //std::cout << "Number of particles in domain " << local_subdomain_idx << ": " << local_domain->species[0].npart() << std::endl; - // Print the position of the 5 particles in the domain - - ntt::GlobalFinalize(); - - std::cout << "Terminating" << std::endl; - + const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + auto timers = timer::Timers {{"Communication"}, nullptr, false}; + InitializeParticleArrays(*local_domain, npart); + // Timers for both the communication routines + auto total_time_elapsed_old = 0; + auto total_time_elapsed_new = 0; + + int seed_ind = 0; + int seed_tag = 1; + for (int i = 0; i < 10; ++i) { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort new + Kokkos::fence(); + auto start_new = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticlesBuffer(*local_domain, &timers); + auto stop_new = std::chrono::high_resolution_clock::now(); + auto duration_new = std::chrono::duration_cast(stop_new - start_new).count(); + total_time_elapsed_new += duration_new; + Kokkos::fence(); + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort old + Kokkos::fence(); + auto start_old = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticles(*local_domain, &timers); + auto stop_old = std::chrono::high_resolution_clock::now(); + auto duration_old = std::chrono::duration_cast(stop_old - start_old).count(); + total_time_elapsed_old += duration_old; + Kokkos::fence(); + } + std::cout << "Total time elapsed for old: " << total_time_elapsed_old << " microseconds" << std::endl; + std::cout << "Total time elapsed for new: " << total_time_elapsed_new << " microseconds" << std::endl; return 0; } + +/* + Buggy behavior: + Consider a single domain with a single mpi rank + Particle tag arrays is set to [0, 0, 1, 1, 2, 3, ...] for a single domain + CommunicateParticles() discounts all the dead particles and reassigns the + other tags to alive + CommunicateParticlesBuffer() only keeps the ParticleTag::Alive particles + and discounts the rest +*/ \ No newline at end of file From 896a9570fa0347abd2e158167ec73189f2ff8541 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Mon, 16 Dec 2024 16:13:27 -0500 Subject: [PATCH 056/124] bug fix in comm --- src/framework/domain/comm_mpi.hpp | 13 +- src/framework/domain/communications.cpp | 193 +++++++++++++++++++----- 2 files changed, 165 insertions(+), 41 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 33431cfe7..2251968c4 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -499,6 +499,16 @@ namespace comm { if ((send_rank < 0) && (recv_rank < 0)) { raise::Error("No send or recv in SendRecvParticlesBuffered", HERE); } + // First set the tags of the sent particles to be dead + auto& this_tag = species.tag; + //Kokkos::parallel_for( + //"SetTagDead", + //Kokkos::RangePolicy(0, indices_to_allocate.size()), + //KOKKOS_LAMBDA(const size_t i) { + // const auto idx = indices_to_send(i); + // this_tag(idx) = static_cast(ParticleTag::dead); + //}); + // Construct send and receive slice for the buffer auto send_slice = range_tuple_t({ 0, indices_to_send.size() }); auto recv_slice = range_tuple_t({ indices_to_send.size(), indices_to_send.size() + @@ -531,8 +541,6 @@ namespace comm { CommunicateParticleQuantityBuffer(species.pld[p], send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); } // Set the tag for the received particles to be alive and perform the necessary displacements - auto& this_tag = species.tag; - if constexpr (D == Dim::_1D) { const auto shift_in_x1 = shifts_in_x[0]; @@ -595,6 +603,7 @@ namespace comm { this_i3_prev(idx) += shift_in_x3; }); } + Kokkos::fence(); return; } diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index cdd32aed6..a43b635b7 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -657,22 +657,26 @@ template HERE); logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { - const auto npart_per_tag_arr = species.npart_per_tag(); - const auto tag_offset = species.tag_offset_h; - auto index_last = tag_offset[tag_offset.extent(0) - 1] + - npart_per_tag_arr[npart_per_tag_arr.size() - 1]; + auto npart_per_tag_arr = species.npart_per_tag(); + auto npart = static_cast(species.npart()); + auto total_alive = static_cast(npart_per_tag_arr[ParticleTag::alive]); + auto total_dead = static_cast(npart_per_tag_arr[ParticleTag::dead]); + auto total_holes = static_cast(npart - total_alive); + auto total_send = static_cast(npart - total_alive - total_dead); + auto total_recv = static_cast(0); + auto tag_count = static_cast(npart_per_tag_arr.size()); + std::vector send_ranks, send_inds; std::vector recv_ranks, recv_inds; // at this point particles should already by tagged in the pusher #if defined(MPI_ENABLED) - timers->start("Communications_sendrecv"); + // Defined for debugging + int mpi_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + // array that holds the number of particles to be received per tag - std::vector npart_per_tag_arr_recv(npart_per_tag_arr.size(), 0); - std::size_t total_recv_count = 0; - const std::size_t total_send_count = species.npart() - - npart_per_tag_arr[ParticleTag::alive] - - npart_per_tag_arr[ParticleTag::dead]; - for (auto& direction : dir::Directions::all) { + std::vector npart_per_tag_arr_recv(tag_count, 0); + for (auto& direction : dir::Directions::all) { const auto [send_params, recv_params] = GetSendRecvParams(this, domain, direction, true); const auto [send_indrank, send_slice] = send_params; @@ -694,11 +698,11 @@ template recv_rank, nsend, nrecv); - total_recv_count += nrecv; + total_recv += nrecv; npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)] = nrecv; } - timers->stop("Communications_sendrecv"); - raise::FatalIf((index_last + total_recv_count) >= species.maxnpart(), + + raise::FatalIf((npart + total_recv) >= species.maxnpart(), "Too many particles to receive (cannot fit into maxptl)", HERE); // Now we know the number of particles to be sent and received per direction @@ -712,54 +716,89 @@ template tag=0 ct tag=1 ct tag=3 ct (dead) (alive) (tag1) ... */ - timers->start("PermuteVector"); auto& this_tag = species.tag; auto& this_tag_offset = species.tag_offset; Kokkos::View permute_vector("permute_vector", species.npart()); - // Current offset is a helper array used to create permute vector - // It stores the number of particles of a given tag type stored during the loop Kokkos::View current_offset("current_offset", species.ntags()); + Kokkos::parallel_for( "PermuteVector", species.npart(), Lambda(const std::size_t p) { - auto current_tag = this_tag(p); - auto idx_permute_vec = this_tag_offset(current_tag) + current_offset(current_tag); - Kokkos::atomic_fetch_add(¤t_offset(current_tag), 1); + auto current_tag = this_tag(p); + auto i_current_tag_offset = Kokkos::atomic_fetch_add(¤t_offset(current_tag), 1); + auto idx_permute_vec = this_tag_offset(current_tag) + i_current_tag_offset; permute_vector(idx_permute_vec) = static_cast(p); }); - timers->stop("PermuteVector"); + + // Check: add the end of the loop, current_offset should be equal to npart_per_tag + auto current_offset_h = Kokkos::create_mirror_view(current_offset); + Kokkos::deep_copy(current_offset_h, current_offset); + for (std::size_t i { 0 }; i < current_offset_h.size(); ++i) { + raise::FatalIf(current_offset_h(i) != npart_per_tag_arr[i], + "Error in permute vector construction", + HERE); + } // allocation_vector(p) assigns the pth received particle // to the pth hole in the array, or after npart() if p > sent+dead count. - Kokkos::View allocation_vector("allocation_vector", total_recv_count); - auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); - std::size_t n_alive = npart_per_tag_arr[ParticleTag::alive]; - std::size_t n_dead = npart_per_tag_arr[ParticleTag::dead]; - std::size_t n_holes = species.npart() - n_alive; + Kokkos::View allocation_vector("allocation_vector", total_recv); + + // TWO BUGS: when nsend = nrecv, an extra dead particle is created out of nowhere + // when nrecv > nsend but < nrecv < nsend + ndead, tags of alive particles are not changed - timers->start("AllocationVector"); Kokkos::parallel_for( "AllocationVector", - total_recv_count, + total_recv, Lambda(const std::size_t p) { // Case: recevied particle count less than dead particle count -> replace dead particles - if (p < n_dead){ + if (p < total_dead){ allocation_vector(p) = permute_vector(p); } // Case: received particle count > dead particle count but < sent particle count -> replace // sent particles - else if (p <= n_holes){ - allocation_vector(p) = permute_vector(n_alive + p); + else if (p < total_holes && p >= total_dead){ + allocation_vector(p) = permute_vector(total_alive + p); } // Case: received particle count exceeds sent + dead particles -> append at the end else { - allocation_vector(p) = static_cast(index_last + (p - n_holes)); + allocation_vector(p) = static_cast(npart + (p - total_holes)); } }); - Kokkos::deep_copy(allocation_vector_h, allocation_vector); - timers->stop("AllocationVector"); + Kokkos::fence(); + + // Compute where the received particles are allocated + if (mpi_rank == 0){ + Kokkos::View particles_allocated_per_tag("particles allocated per tag", tag_count); + Kokkos::parallel_for( + "ParticlesAllocatedPerTag", + total_recv, + Lambda(const std::size_t i) { + auto index = allocation_vector(i); + auto tag = this_tag(index); + Kokkos::atomic_fetch_add(&particles_allocated_per_tag(tag), 1); + }); + Kokkos::fence(); + auto particles_allocated_per_tag_h = Kokkos::create_mirror_view(particles_allocated_per_tag); + Kokkos::deep_copy(particles_allocated_per_tag_h, particles_allocated_per_tag); + std::cout << "Particles allocated per tag (pre recv): "; + for (std::size_t i = 0; i < tag_count; i++){ + std::cout << "[" << particles_allocated_per_tag_h[i] << "] "; + } + std::cout << std::endl; + } + + + // Check if the particle tags are only dead or alive + //if (mpi_rank == 0){ + // std::cout << "Before COMM: " << std::endl; + // std::cout << "Tag counts: "; + // for (std::size_t i = 0; i < tag_count; i++){ + // std::cout << "[" << npart_per_tag_arr[i] << "] "; + // } + // std::cout << std::endl; + //} std::size_t count_recv = 0; std::size_t iteration = 0; // Main loop over all direction where we send the data @@ -821,8 +860,8 @@ template } // Tuple that contains the start and end indices of permtute_vec pointing to a given tag type = dir2tag(dir) - auto range_permute = std::make_pair(static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)]), - static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)] + + auto range_permute = std::make_pair(static_cast(species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)]), + static_cast(species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)] + npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); // Tuple that contains the start and end indices for allocation_vector pointing to a given tag type = dir2tag(dir) auto range_allocate = std::make_pair(static_cast(count_recv), @@ -843,9 +882,85 @@ template count_recv += npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)]; iteration++; } - // If receive count is less than send count then make the tags of sent dead ? Ask Hayk - species.set_npart(index_last + std::max(total_recv_count, total_send_count) - total_send_count); -#endif + // Compute where the received particles are allocated + //if (mpi_rank == 0){ + //Kokkos::View particles_allocated_per_tag("particles allocated per tag", tag_count); + //Kokkos::parallel_for( + // "ParticlesAllocatedPerTag", + // total_recv, + // Lambda(const std::size_t i) { + // auto index = allocation_vector(i); + // auto tag = this_tag(index); + // Kokkos::atomic_fetch_add(&particles_allocated_per_tag(tag), 1); + // }); + //Kokkos::fence(); + //auto particles_allocated_per_tag_h = Kokkos::create_mirror_view(particles_allocated_per_tag); + //Kokkos::deep_copy(particles_allocated_per_tag_h, particles_allocated_per_tag); + + //std::cout << "Particles allocated per tag (post recv): "; + //for (std::size_t i = 0; i < tag_count; i++){ + // std::cout << "[" << particles_allocated_per_tag_h[i] << "] "; + //} + //std::cout << std::endl; + // } + // If receive count is less than send count then make the tags of sent dead + if (total_recv <= total_holes){ + if (total_recv <= total_dead){ + // Case: all sent particles' tags are set to dead + /* (received) + [ | <------------------> | <-------->] + (dead) (alive) (sent) + || + (to be made dead) + ^ + (offset) + */ + + auto offset = total_alive + total_dead; + Kokkos::parallel_for( + "CommunicateParticles", + total_send, + Lambda(index_t p) { + this_tag(permute_vector(offset + p)) = ParticleTag::dead; + }); + } + else{ + // Case: tags of sent particles that are not replaced by recevied particles are made dead + /* (received) (received) + [ | <------------------> |] + (dead) (alive) (sent) + || + (to be made dead) + ^ + (offset) + */ + auto offset = total_alive + total_recv; + Kokkos::parallel_for( + "CommunicateParticles", + total_send - (total_recv - total_dead), + Lambda(index_t p) { + this_tag(permute_vector(offset + p)) = ParticleTag::dead; + }); + } + } + + + // Check if the particle tags are only dead or alive + species.set_npart(npart + std::max(total_send, total_recv) - total_send); + npart_per_tag_arr = species.npart_per_tag(); + //if (mpi_rank == 0) + //{ + // std::cout << "After COMM: " << std::endl; + // std::cout << "Tag counts: "; + // for (std::size_t i = 0; i < tag_count; i++){ + // std::cout << "[" << npart_per_tag_arr[i] << "] "; + // } + // std::cout << std::endl; + // std::cout << "Holes filled: " << total_holes << " Total recv: " << total_recv << + // "Total send: " << total_send << std::endl; + // std::cout << std::endl << "*************"<< std::endl; + //} + #endif } } From 9391c196f5e8c65ef4d43a9e924b948b9ab77adf Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 16 Dec 2024 17:19:36 -0500 Subject: [PATCH 057/124] fmt --- TASKLIST.md | 4 + benchmark/benchmark.cpp | 230 +++++++++------- extern/Kokkos | 2 +- extern/adios2 | 2 +- extern/plog | 2 +- src/framework/domain/comm_mpi.hpp | 333 +++++++++++++++--------- src/framework/domain/communications.cpp | 322 +++++++++++------------ 7 files changed, 499 insertions(+), 396 deletions(-) diff --git a/TASKLIST.md b/TASKLIST.md index 069a7deb2..c12f60f4c 100644 --- a/TASKLIST.md +++ b/TASKLIST.md @@ -3,3 +3,7 @@ - [ ] removing temporary variables in interpolation - [ ] passing by value vs const ref in metric - [ ] return physical coords one-by-one instead of by passing full vector + +### Things to look into + +1. _h fields in mpi communication diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 6bfe5c7c7..797c8ed87 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -2,59 +2,65 @@ #include "global.h" #include "utils/error.h" -#include "framework/domain/domain.h" -#include "framework/domain/metadomain.h" -#include "framework/containers/particles.h" -#include "framework/domain/communications.cpp" + #include "metrics/metric_base.h" #include "metrics/minkowski.h" + +#include "framework/containers/species.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + #include -#define TIMER_START(label) \ - Kokkos::fence(); \ - auto start_##label = std::chrono::high_resolution_clock::now(); +#include "framework/domain/communications.cpp" + +#define TIMER_START(label) \ + Kokkos::fence(); \ + auto start_##label = std::chrono::high_resolution_clock::now(); -#define TIMER_STOP(label) \ - Kokkos::fence(); \ - auto stop_##label = std::chrono::high_resolution_clock::now(); \ - auto duration_##label = std::chrono::duration_cast(stop_##label - start_##label).count(); \ - std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" << std::endl; +#define TIMER_STOP(label) \ + Kokkos::fence(); \ + auto stop_##label = std::chrono::high_resolution_clock::now(); \ + auto duration_##label = std::chrono::duration_cast( \ + stop_##label - start_##label) \ + .count(); \ + std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" \ + << std::endl; /* Test to check the performance of the new particle allocation scheme - - Create a metadomain object main() - - Set npart + initialize tags InitializeParticleArrays() - - 'Push' the particles by randomly updating the tags PushParticles() + - Create a metadomain object main() + - Set npart + initialize tags InitializeParticleArrays() + - 'Push' the particles by randomly updating the tags PushParticles() - Communicate particles to neighbors and time the communication - Compute the time taken for best of N iterations for the communication */ // Set npart and set the particle tags to alive template -void InitializeParticleArrays(Domain &domain, const int npart){ - raise::ErrorIf(npart > domain.species[0].maxnpart(), - "Npart cannot be greater than maxnpart", HERE); +void InitializeParticleArrays(Domain& domain, const int npart) { + raise::ErrorIf(npart > domain.species[0].maxnpart(), + "Npart cannot be greater than maxnpart", + HERE); const auto nspecies = domain.species.size(); for (int i_spec = 0; i_spec < nspecies; i_spec++) { domain.species[i_spec].set_npart(npart); domain.species[i_spec].SyncHostDevice(); - auto &this_tag = domain.species[i_spec].tag; + auto& this_tag = domain.species[i_spec].tag; Kokkos::parallel_for( "Initialize particles", npart, - Lambda(const std::size_t i) - { - this_tag(i) = ParticleTag::alive; - } - ); + Lambda(const std::size_t i) { this_tag(i) = ParticleTag::alive; }); } return; } // Randomly reassign tags to particles for a fraction of particles template -void PushParticles(Domain &domain, const double send_frac, - const int seed_ind, const int seed_tag){ +void PushParticles(Domain& domain, + const double send_frac, + const int seed_ind, + const int seed_tag) { raise::ErrorIf(send_frac > 1.0, "send_frac cannot be greater than 1.0", HERE); const auto nspecies = domain.species.size(); for (int i_spec = 0; i_spec < nspecies; i_spec++) { @@ -62,26 +68,27 @@ void PushParticles(Domain &domain, const double send_frac, const auto nparticles = domain.species[i_spec].npart(); const auto nparticles_to_send = static_cast(send_frac * nparticles); // Generate random indices to send - Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); + // Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); Kokkos::View indices_to_send("indices_to_send", nparticles_to_send); - Kokkos::fill_random(indices_to_send, random_pool, 0, nparticles); + Kokkos::fill_random(indices_to_send, domain.random_pool, 0, nparticles); // Generate random tags to send - Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); + // Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); Kokkos::View tags_to_send("tags_to_send", nparticles_to_send); - Kokkos::fill_random(tags_to_send, random_pool_tag, 0, domain.species[i_spec].ntags()); - auto &this_tag = domain.species[i_spec].tag; + Kokkos::fill_random(tags_to_send, + domain.random_pool, + 0, + domain.species[i_spec].ntags()); + auto& this_tag = domain.species[i_spec].tag; Kokkos::parallel_for( - "Push particles", - nparticles_to_send, - Lambda(const std::size_t i) - { - auto prtl_to_send = indices_to_send(i); - auto tag_to_send = tags_to_send(i); - this_tag(prtl_to_send) = tag_to_send; - } - ); - domain.species[i_spec].npart_per_tag(); - domain.species[i_spec].SyncHostDevice(); + "Push particles", + nparticles_to_send, + Lambda(const std::size_t i) { + auto prtl_to_send = indices_to_send(i); + auto tag_to_send = tags_to_send(i); + this_tag(prtl_to_send) = tag_to_send; + }); + domain.species[i_spec].npart_per_tag(); + domain.species[i_spec].SyncHostDevice(); } return; } @@ -90,42 +97,51 @@ auto main(int argc, char* argv[]) -> int { std::cout << "Constructing the domain" << std::endl; ntt::GlobalInitialize(argc, argv); // Create a Metadomain object - const unsigned int ndomains = 1; - const std::vector global_decomposition = {{-1,-1, -1}}; - const std::vector global_ncells = {32, 32, 32}; - const boundaries_t global_extent = {{0.0, 3.0}, {0.0, 3.0}, {0.0, 3.0}}; - const boundaries_t global_flds_bc = { {FldsBC::PERIODIC, FldsBC::PERIODIC}, - {FldsBC::PERIODIC, FldsBC::PERIODIC}, - {FldsBC::PERIODIC, FldsBC::PERIODIC}}; - const boundaries_t global_prtl_bc = { {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}}; + const unsigned int ndomains = 1; + const std::vector global_decomposition = { + { -1, -1, -1 } + }; + const std::vector global_ncells = { 32, 32, 32 }; + const boundaries_t global_extent = { + { 0.0, 3.0 }, + { 0.0, 3.0 }, + { 0.0, 3.0 } + }; + const boundaries_t global_flds_bc = { + { FldsBC::PERIODIC, FldsBC::PERIODIC }, + { FldsBC::PERIODIC, FldsBC::PERIODIC }, + { FldsBC::PERIODIC, FldsBC::PERIODIC } + }; + const boundaries_t global_prtl_bc = { + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC } + }; const std::map metric_params = {}; - const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; + const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; const double npart_to_send_frac = 0.01; - const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); - auto species = ntt::Particles(1u, - "test_e", - 1.0f, - 1.0f, - maxnpart, - ntt::PrtlPusher::BORIS, - false, - ntt::Cooling::NONE); - auto metadomain = Metadomain> - ( ndomains, - global_decomposition, - global_ncells, - global_extent, - global_flds_bc, - global_prtl_bc, - metric_params, - {species} - ); + const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); + auto species = ntt::ParticlesSpecies(1u, + "test_e", + 1.0f, + 1.0f, + maxnpart, + ntt::PrtlPusher::BORIS, + false, + ntt::Cooling::NONE); + auto metadomain = Metadomain>( + ndomains, + global_decomposition, + global_ncells, + global_extent, + global_flds_bc, + global_prtl_bc, + metric_params, + { species }); const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; - auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); - auto timers = timer::Timers {{"Communication"}, nullptr, false}; + auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + auto timers = timer::Timers { { "Communication" }, nullptr, false }; InitializeParticleArrays(*local_domain, npart); // Timers for both the communication routines auto total_time_elapsed_old = 0; @@ -133,34 +149,46 @@ auto main(int argc, char* argv[]) -> int { int seed_ind = 0; int seed_tag = 1; + Kokkos::fence(); + for (int i = 0; i < 10; ++i) { - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort new - Kokkos::fence(); - auto start_new = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticlesBuffer(*local_domain, &timers); - auto stop_new = std::chrono::high_resolution_clock::now(); - auto duration_new = std::chrono::duration_cast(stop_new - start_new).count(); - total_time_elapsed_new += duration_new; - Kokkos::fence(); - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort old - Kokkos::fence(); - auto start_old = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticles(*local_domain, &timers); - auto stop_old = std::chrono::high_resolution_clock::now(); - auto duration_old = std::chrono::duration_cast(stop_old - start_old).count(); - total_time_elapsed_old += duration_old; - Kokkos::fence(); + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort new + Kokkos::fence(); + auto start_new = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticlesBuffer(*local_domain, &timers); + auto stop_new = std::chrono::high_resolution_clock::now(); + auto duration_new = std::chrono::duration_cast( + stop_new - start_new) + .count(); + total_time_elapsed_new += duration_new; + Kokkos::fence(); + } + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort old + Kokkos::fence(); + auto start_old = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticles(*local_domain, &timers); + auto stop_old = std::chrono::high_resolution_clock::now(); + auto duration_old = std::chrono::duration_cast( + stop_old - start_old) + .count(); + total_time_elapsed_old += duration_old; + Kokkos::fence(); + } } - std::cout << "Total time elapsed for old: " << total_time_elapsed_old << " microseconds" << std::endl; - std::cout << "Total time elapsed for new: " << total_time_elapsed_new << " microseconds" << std::endl; + std::cout << "Total time elapsed for old: " << total_time_elapsed_old + << " microseconds" << std::endl; + std::cout << "Total time elapsed for new: " << total_time_elapsed_new + << " microseconds" << std::endl; return 0; } @@ -172,4 +200,4 @@ auto main(int argc, char* argv[]) -> int { other tags to alive CommunicateParticlesBuffer() only keeps the ParticleTag::Alive particles and discounts the rest -*/ \ No newline at end of file +*/ diff --git a/extern/Kokkos b/extern/Kokkos index 5fc08a9a7..b6a16bc9d 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit 5fc08a9a7da14d8530f8c7035d008ef63ddb4e5c +Subproject commit b6a16bc9d88a9252d76e64fd2be20c58eb5d7f2e diff --git a/extern/adios2 b/extern/adios2 index a6e8314cc..25ccd6aaa 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit a6e8314cc3c0b28d496b44dcd4f15685013b887b +Subproject commit 25ccd6aaa810bbc217b43421f9c43140082c65b9 diff --git a/extern/plog b/extern/plog index 85a871b13..96637a6e5 160000 --- a/extern/plog +++ b/extern/plog @@ -1 +1 @@ -Subproject commit 85a871b13be0bd1a9e0110744fa60cc9bd1e8380 +Subproject commit 96637a6e5e53f54e4e56d667d312c564d979ec0e diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 2251968c4..9b2ad0a33 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -47,14 +47,12 @@ namespace comm { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - raise::ErrorIf( (send_rank == rank && send_idx != idx) || (recv_rank == rank && recv_idx != idx), "Multiple-domain single-rank communication not yet implemented", HERE); - if ((send_idx == idx) and (recv_idx == idx)) { // trivial copy if sending to self and receiving from self @@ -332,10 +330,11 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } - Kokkos::deep_copy(arr, array_h); + if ((recv_rank >= 0) and (recv_count > 0)) { + Kokkos::deep_copy(arr, array_h); + } } - void ParticleSendRecvCount(int send_rank, int recv_rank, const std::size_t& send_count, @@ -459,154 +458,256 @@ namespace comm { return recv_count; } - template - void CommunicateParticleQuantityBuffer( array_t& arr, - int send_rank, - int recv_rank, - const range_tuple_t& send_slice, - const range_tuple_t& recv_slice, - Kokkos::View indices_to_send, - Kokkos::View indices_to_allocate) { - - array_t buffer( "buffer", indices_to_send.extent(0) + - indices_to_allocate.extent(0)); + void CommunicateParticleQuantityBuffer(array_t& arr, + int send_rank, + int recv_rank, + const range_tuple_t& send_slice, + const range_tuple_t& recv_slice, + Kokkos::View indices_to_send, + Kokkos::View indices_to_allocate) { + + array_t buffer("buffer", + indices_to_send.extent(0) + indices_to_allocate.extent(0)); // Populate the buffer for particle array Kokkos::parallel_for( - "PopulateBuffer", - indices_to_send.extent(0), - Lambda(const size_t i) { - buffer(i) = arr(indices_to_send(i)); - }); + "PopulateBuffer", + indices_to_send.extent(0), + Lambda(const size_t i) { buffer(i) = arr(indices_to_send(i)); }); CommunicateParticleQuantity(buffer, send_rank, recv_rank, send_slice, recv_slice); // Populate from buffer to the particle array Kokkos::parallel_for( - "PopulateFromBuffer", - indices_to_allocate.extent(0), - Lambda(const size_t i) { - arr(indices_to_allocate(i)) = buffer(indices_to_send.extent(0) + i); - }); - return; + "PopulateFromBuffer", + indices_to_allocate.extent(0), + Lambda(const size_t i) { + arr(indices_to_allocate(i)) = buffer(indices_to_send.extent(0) + i); + }); + return; } template - void CommunicateParticlesBuffer(Particles& species, - Kokkos::View indices_to_send, - Kokkos::View indices_to_allocate, - int send_rank, - int recv_rank, - std::vector shifts_in_x){ + void CommunicateParticlesBuffer(Particles& species, + Kokkos::View indices_to_send, + Kokkos::View indices_to_allocate, + int send_rank, + int recv_rank, + std::vector shifts_in_x) { if ((send_rank < 0) && (recv_rank < 0)) { raise::Error("No send or recv in SendRecvParticlesBuffered", HERE); } // First set the tags of the sent particles to be dead - auto& this_tag = species.tag; - //Kokkos::parallel_for( + auto& this_tag = species.tag; + // Kokkos::parallel_for( //"SetTagDead", - //Kokkos::RangePolicy(0, indices_to_allocate.size()), - //KOKKOS_LAMBDA(const size_t i) { - // const auto idx = indices_to_send(i); - // this_tag(idx) = static_cast(ParticleTag::dead); - //}); - + // Kokkos::RangePolicy(0, indices_to_allocate.size()), + // KOKKOS_LAMBDA(const size_t i) { + // const auto idx = indices_to_send(i); + // this_tag(idx) = static_cast(ParticleTag::dead); + // }); + // Construct send and receive slice for the buffer auto send_slice = range_tuple_t({ 0, indices_to_send.size() }); - auto recv_slice = range_tuple_t({ indices_to_send.size(), indices_to_send.size() + - indices_to_allocate.size() }); + auto recv_slice = range_tuple_t( + { indices_to_send.size(), + indices_to_send.size() + indices_to_allocate.size() }); // Send and receive the particles - CommunicateParticleQuantityBuffer(species.i1, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx1, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.i1_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx1_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i1, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx1, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i1_prev, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx1_prev, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); if constexpr (D == Dim::_2D || D == Dim::_3D) { - CommunicateParticleQuantityBuffer(species.i2, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx2, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.i2_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx2_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i2, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx2, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i2_prev, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx2_prev, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); } if constexpr (D == Dim::_3D) { - CommunicateParticleQuantityBuffer(species.i3, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx3, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.i3_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx3_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i3, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx3, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i3_prev, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx3_prev, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); } - CommunicateParticleQuantityBuffer(species.ux1, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.ux2, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.ux3, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.weight, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.ux1, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.ux2, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.ux3, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.weight, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); if constexpr (D == Dim::_2D and C != Coord::Cart) { - CommunicateParticleQuantityBuffer(species.phi, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.phi, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); } for (auto p { 0 }; p < species.npld(); ++p) { - CommunicateParticleQuantityBuffer(species.pld[p], send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.pld[p], + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); } // Set the tag for the received particles to be alive and perform the necessary displacements - if constexpr (D == Dim::_1D) - { - const auto shift_in_x1 = shifts_in_x[0]; - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; + if constexpr (D == Dim::_1D) { + const auto shift_in_x1 = shifts_in_x[0]; + auto& this_i1 = species.i1; + auto& this_i1_prev = species.i1_prev; Kokkos::parallel_for( - "SetTagAlive", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { - const auto idx = indices_to_allocate(i); - this_tag(idx) = static_cast(ParticleTag::alive); - this_i1(idx) += shift_in_x1; - this_i1_prev(idx) += shift_in_x1; - }); + "SetTagAlive", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + this_tag(idx) = static_cast(ParticleTag::alive); + this_i1(idx) += shift_in_x1; + this_i1_prev(idx) += shift_in_x1; + }); } - else if constexpr (D == Dim::_2D) - { - const auto shift_in_x1 = shifts_in_x[0]; - const auto shift_in_x2 = shifts_in_x[1]; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; + else if constexpr (D == Dim::_2D) { + const auto shift_in_x1 = shifts_in_x[0]; + const auto shift_in_x2 = shifts_in_x[1]; + auto& this_i1 = species.i1; + auto& this_i2 = species.i2; + auto& this_i1_prev = species.i1_prev; + auto& this_i2_prev = species.i2_prev; Kokkos::parallel_for( - "SetTagAlive", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { - const auto idx = indices_to_allocate(i); - this_tag(idx) = static_cast(ParticleTag::alive); - this_i1(idx) += shift_in_x1; - this_i2(idx) += shift_in_x2; - this_i1_prev(idx) += shift_in_x1; - this_i2_prev(idx) += shift_in_x2; - }); + "SetTagAlive", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + this_tag(idx) = static_cast(ParticleTag::alive); + this_i1(idx) += shift_in_x1; + this_i2(idx) += shift_in_x2; + this_i1_prev(idx) += shift_in_x1; + this_i2_prev(idx) += shift_in_x2; + }); } - else if constexpr (D == Dim::_3D) - { - const auto shift_in_x1 = shifts_in_x[0]; - const auto shift_in_x2 = shifts_in_x[1]; - const auto shift_in_x3 = shifts_in_x[2]; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i3 = species.i3; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; - auto& this_i3_prev = species.i3_prev; + else if constexpr (D == Dim::_3D) { + const auto shift_in_x1 = shifts_in_x[0]; + const auto shift_in_x2 = shifts_in_x[1]; + const auto shift_in_x3 = shifts_in_x[2]; + auto& this_i1 = species.i1; + auto& this_i2 = species.i2; + auto& this_i3 = species.i3; + auto& this_i1_prev = species.i1_prev; + auto& this_i2_prev = species.i2_prev; + auto& this_i3_prev = species.i3_prev; Kokkos::parallel_for( - "SetTagAlive", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { - const auto idx = indices_to_allocate(i); - this_tag(idx) = static_cast(ParticleTag::alive); - this_i1(idx) += shift_in_x1; - this_i2(idx) += shift_in_x2; - this_i3(idx) += shift_in_x3; - this_i1_prev(idx) += shift_in_x1; - this_i2_prev(idx) += shift_in_x2; - this_i3_prev(idx) += shift_in_x3; - }); + "SetTagAlive", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + this_tag(idx) = static_cast(ParticleTag::alive); + this_i1(idx) += shift_in_x1; + this_i2(idx) += shift_in_x2; + this_i3(idx) += shift_in_x3; + this_i1_prev(idx) += shift_in_x1; + this_i2_prev(idx) += shift_in_x2; + this_i3_prev(idx) += shift_in_x3; + }); } Kokkos::fence(); return; - } - + } } // namespace comm diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index a43b635b7..5e5da4a0c 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -86,8 +86,8 @@ namespace ntt { } else { // no communication necessary return { - {0, -1}, - {0, -1} + { 0, -1 }, + { 0, -1 } }; } #if defined(MPI_ENABLED) @@ -110,8 +110,8 @@ namespace ntt { (void)send_rank; (void)recv_rank; return { - {send_ind, send_rank}, - {recv_ind, recv_rank} + { send_ind, send_rank }, + { recv_ind, recv_rank } }; } @@ -129,8 +129,8 @@ namespace ntt { const auto is_receiving = (recv_rank >= 0); if (not(is_sending or is_receiving)) { return { - {{ 0, -1 }, {}}, - {{ 0, -1 }, {}} + { { 0, -1 }, {} }, + { { 0, -1 }, {} } }; } auto send_slice = std::vector {}; @@ -196,8 +196,8 @@ namespace ntt { } return { - {{ send_ind, send_rank }, send_slice}, - {{ recv_ind, recv_rank }, recv_slice}, + { { send_ind, send_rank }, send_slice }, + { { recv_ind, recv_rank }, recv_slice }, }; } @@ -645,26 +645,27 @@ namespace ntt { } } - -/* - New function to communicate particles using a buffer -*/ -template + /* + New function to communicate particles using a buffer + */ + template void Metadomain::CommunicateParticlesBuffer(Domain& domain, - timer::Timers* timers) { + timer::Timers* timers) { raise::ErrorIf(timers == nullptr, "Timers not passed when Comm::Prtl called", HERE); logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { - auto npart_per_tag_arr = species.npart_per_tag(); - auto npart = static_cast(species.npart()); - auto total_alive = static_cast(npart_per_tag_arr[ParticleTag::alive]); - auto total_dead = static_cast(npart_per_tag_arr[ParticleTag::dead]); - auto total_holes = static_cast(npart - total_alive); - auto total_send = static_cast(npart - total_alive - total_dead); - auto total_recv = static_cast(0); - auto tag_count = static_cast(npart_per_tag_arr.size()); + auto npart_per_tag_arr = species.npart_per_tag(); + auto npart = static_cast(species.npart()); + auto total_alive = static_cast( + npart_per_tag_arr[ParticleTag::alive]); + auto total_dead = static_cast( + npart_per_tag_arr[ParticleTag::dead]); + auto total_holes = static_cast(npart - total_alive); + auto total_send = static_cast(npart - total_alive - total_dead); + auto total_recv = static_cast(0); + auto tag_count = static_cast(npart_per_tag_arr.size()); std::vector send_ranks, send_inds; std::vector recv_ranks, recv_inds; @@ -676,9 +677,9 @@ template // array that holds the number of particles to be received per tag std::vector npart_per_tag_arr_recv(tag_count, 0); - for (auto& direction : dir::Directions::all) { + for (auto& direction : dir::Directions::all) { const auto [send_params, - recv_params] = GetSendRecvParams(this, domain, direction, true); + recv_params] = GetSendRecvParams(this, domain, direction, true); const auto [send_indrank, send_slice] = send_params; const auto [recv_indrank, recv_slice] = recv_params; const auto [send_ind, send_rank] = send_indrank; @@ -686,78 +687,76 @@ template if (send_rank < 0 and recv_rank < 0) { continue; } - const auto send_dir_tag = mpi::PrtlSendTag::dir2tag(direction); - const auto nsend = npart_per_tag_arr[send_dir_tag]; - std::size_t nrecv = 0; + const auto send_dir_tag = mpi::PrtlSendTag::dir2tag(direction); + const auto nsend = npart_per_tag_arr[send_dir_tag]; + std::size_t nrecv = 0; // Get the receive count send_ranks.push_back(send_rank); recv_ranks.push_back(recv_rank); send_inds.push_back(send_ind); recv_inds.push_back(recv_ind); - comm::ParticleSendRecvCount(send_rank, - recv_rank, - nsend, - nrecv); - total_recv += nrecv; + comm::ParticleSendRecvCount(send_rank, recv_rank, nsend, nrecv); + total_recv += nrecv; npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)] = nrecv; } raise::FatalIf((npart + total_recv) >= species.maxnpart(), - "Too many particles to receive (cannot fit into maxptl)", - HERE); + "Too many particles to receive (cannot fit into maxptl)", + HERE); // Now we know the number of particles to be sent and received per direction /* permute vector contains the indices of the tags to send and receive in the order of the directions E.g., consider the following tag array [ 0, 0, 3, 0, 1,...] Then, permute vector will look something like - [0, 1, 3, ..., 2, ..., 4, ... ] - |<--------- >| |<----->| |<----->| .... - tag=0 ct tag=1 ct tag=3 ct - (dead) (alive) (tag1) ... + [0, 1, 3, ..., 4, ..., ... 2, ... ] + |<--------- >| |<----->| |<----->| .... + tag=0 ct tag=1 ct tag=3 ct + (dead) (alive) (tag1) ... */ - auto& this_tag = species.tag; - auto& this_tag_offset = species.tag_offset; + auto& this_tag = species.tag; + auto& this_tag_offset = species.tag_offset; Kokkos::View permute_vector("permute_vector", species.npart()); Kokkos::View current_offset("current_offset", species.ntags()); + // @TODO: do not save tag = 1 particles into permute_vector + // instead of species.npart(), size will be species.npart() - npart_per_tag[ParticleTag::alive]; Kokkos::parallel_for( "PermuteVector", species.npart(), - Lambda(const std::size_t p) { - auto current_tag = this_tag(p); - auto i_current_tag_offset = Kokkos::atomic_fetch_add(¤t_offset(current_tag), 1); - auto idx_permute_vec = this_tag_offset(current_tag) + i_current_tag_offset; - permute_vector(idx_permute_vec) = static_cast(p); + Lambda(index_t p) { + const auto current_tag = this_tag(p); + const auto idx_permute_vec = this_tag_offset(current_tag) + + Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + permute_vector(idx_permute_vec) = p; }); - // Check: add the end of the loop, current_offset should be equal to npart_per_tag - auto current_offset_h = Kokkos::create_mirror_view(current_offset); - Kokkos::deep_copy(current_offset_h, current_offset); - for (std::size_t i { 0 }; i < current_offset_h.size(); ++i) { - raise::FatalIf(current_offset_h(i) != npart_per_tag_arr[i], - "Error in permute vector construction", - HERE); - } + // Check: add the end of the loop, current_offset should be equal to npart_per_tag + auto current_offset_h = Kokkos::create_mirror_view(current_offset); + Kokkos::deep_copy(current_offset_h, current_offset); + for (std::size_t i { 0 }; i < current_offset_h.size(); ++i) { + raise::FatalIf(current_offset_h(i) != npart_per_tag_arr[i], + "Error in permute vector construction", + HERE); + } // allocation_vector(p) assigns the pth received particle // to the pth hole in the array, or after npart() if p > sent+dead count. Kokkos::View allocation_vector("allocation_vector", total_recv); - // TWO BUGS: when nsend = nrecv, an extra dead particle is created out of nowhere - // when nrecv > nsend but < nrecv < nsend + ndead, tags of alive particles are not changed - Kokkos::parallel_for( "AllocationVector", total_recv, - Lambda(const std::size_t p) { - // Case: recevied particle count less than dead particle count -> replace dead particles - if (p < total_dead){ + Lambda(index_t p) { + // Case: received particle count less than dead particle count -> replace dead particles + if (p < total_dead) { allocation_vector(p) = permute_vector(p); } // Case: received particle count > dead particle count but < sent particle count -> replace // sent particles - else if (p < total_holes && p >= total_dead){ + else if (p < total_holes && p >= total_dead) { allocation_vector(p) = permute_vector(total_alive + p); } // Case: received particle count exceeds sent + dead particles -> append at the end @@ -767,52 +766,20 @@ template }); Kokkos::fence(); - // Compute where the received particles are allocated - if (mpi_rank == 0){ - Kokkos::View particles_allocated_per_tag("particles allocated per tag", tag_count); - Kokkos::parallel_for( - "ParticlesAllocatedPerTag", - total_recv, - Lambda(const std::size_t i) { - auto index = allocation_vector(i); - auto tag = this_tag(index); - Kokkos::atomic_fetch_add(&particles_allocated_per_tag(tag), 1); - }); - Kokkos::fence(); - auto particles_allocated_per_tag_h = Kokkos::create_mirror_view(particles_allocated_per_tag); - Kokkos::deep_copy(particles_allocated_per_tag_h, particles_allocated_per_tag); - - std::cout << "Particles allocated per tag (pre recv): "; - for (std::size_t i = 0; i < tag_count; i++){ - std::cout << "[" << particles_allocated_per_tag_h[i] << "] "; - } - std::cout << std::endl; - } - - - // Check if the particle tags are only dead or alive - //if (mpi_rank == 0){ - // std::cout << "Before COMM: " << std::endl; - // std::cout << "Tag counts: "; - // for (std::size_t i = 0; i < tag_count; i++){ - // std::cout << "[" << npart_per_tag_arr[i] << "] "; - // } - // std::cout << std::endl; - //} std::size_t count_recv = 0; std::size_t iteration = 0; // Main loop over all direction where we send the data for (auto& direction : dir::Directions::all) { // When nowhere to send and receive - auto send_rank = send_ranks[iteration]; - auto recv_rank = recv_ranks[iteration]; + auto send_rank = send_ranks[iteration]; + auto recv_rank = recv_ranks[iteration]; if (send_rank < 0 and recv_rank < 0) { continue; } // Get the coordinate shifts in xi std::vector shifts_in_x; - auto recv_ind = recv_inds[iteration]; + auto recv_ind = recv_inds[iteration]; if constexpr (D == Dim::_1D) { int shift_in_x1 { 0 }; if ((-direction)[0] == -1) { @@ -821,8 +788,7 @@ template shift_in_x1 = domain.mesh.n_active(in::x1); } shifts_in_x.push_back(shift_in_x1); - } - else if constexpr (D == Dim::_2D) { + } else if constexpr (D == Dim::_2D) { int shift_in_x1 { 0 }, shift_in_x2 { 0 }; if ((-direction)[0] == -1) { shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); @@ -836,8 +802,7 @@ template } shifts_in_x.push_back(shift_in_x1); shifts_in_x.push_back(shift_in_x2); - } - else if constexpr (D == Dim::_3D) { + } else if constexpr (D == Dim::_3D) { int shift_in_x1 { 0 }, shift_in_x2 { 0 }, shift_in_x3 { 0 }; if ((-direction)[0] == -1) { shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); @@ -860,32 +825,39 @@ template } // Tuple that contains the start and end indices of permtute_vec pointing to a given tag type = dir2tag(dir) - auto range_permute = std::make_pair(static_cast(species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)]), - static_cast(species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)] + - npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); + auto range_permute = std::make_pair( + static_cast( + species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)]), + static_cast( + species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)] + + npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); // Tuple that contains the start and end indices for allocation_vector pointing to a given tag type = dir2tag(dir) - auto range_allocate = std::make_pair(static_cast(count_recv), - static_cast(count_recv + - npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)])); + auto range_allocate = std::make_pair( + static_cast(count_recv), + static_cast( + count_recv + + npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)])); + // @TODO: check subview index // contains the indices of all particles of a given tag = mpi::PrtlSendTag::dir2tag(direction) - auto indices_to_send = Kokkos::subview(permute_vector, range_permute); + auto indices_to_send = Kokkos::subview(permute_vector, range_permute); // contains the indices of the holes where the received particles will be placed - auto indices_to_allocate = Kokkos::subview(allocation_vector, range_allocate); + auto indices_to_allocate = Kokkos::subview(allocation_vector, + range_allocate); // Main function that sends the particles and receives the arrays - comm::CommunicateParticlesBuffer( species, - indices_to_send, - indices_to_allocate, - send_rank, - recv_rank, - shifts_in_x); + comm::CommunicateParticlesBuffer(species, + indices_to_send, + indices_to_allocate, + send_rank, + recv_rank, + shifts_in_x); count_recv += npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)]; iteration++; } // Compute where the received particles are allocated - //if (mpi_rank == 0){ - //Kokkos::View particles_allocated_per_tag("particles allocated per tag", tag_count); - //Kokkos::parallel_for( + // if (mpi_rank == 0){ + // Kokkos::View particles_allocated_per_tag("particles + // allocated per tag", tag_count); Kokkos::parallel_for( // "ParticlesAllocatedPerTag", // total_recv, // Lambda(const std::size_t i) { @@ -893,79 +865,77 @@ template // auto tag = this_tag(index); // Kokkos::atomic_fetch_add(&particles_allocated_per_tag(tag), 1); // }); - //Kokkos::fence(); - //auto particles_allocated_per_tag_h = Kokkos::create_mirror_view(particles_allocated_per_tag); - //Kokkos::deep_copy(particles_allocated_per_tag_h, particles_allocated_per_tag); - - //std::cout << "Particles allocated per tag (post recv): "; - //for (std::size_t i = 0; i < tag_count; i++){ - // std::cout << "[" << particles_allocated_per_tag_h[i] << "] "; - //} - //std::cout << std::endl; + // Kokkos::fence(); + // auto particles_allocated_per_tag_h = + // Kokkos::create_mirror_view(particles_allocated_per_tag); + // Kokkos::deep_copy(particles_allocated_per_tag_h, + // particles_allocated_per_tag); + + // std::cout << "Particles allocated per tag (post recv): "; + // for (std::size_t i = 0; i < tag_count; i++){ + // std::cout << "[" << particles_allocated_per_tag_h[i] << "] "; // } - // If receive count is less than send count then make the tags of sent dead - if (total_recv <= total_holes){ - if (total_recv <= total_dead){ - // Case: all sent particles' tags are set to dead - /* (received) - [ | <------------------> | <-------->] - (dead) (alive) (sent) - || - (to be made dead) - ^ - (offset) - */ - - auto offset = total_alive + total_dead; + // std::cout << std::endl; + // } + // If receive count is less than send count then make the tags of sent dead + if (total_recv <= total_holes) { + if (total_recv <= total_dead) { + // Case: all sent particles' tags are set to dead + /* (received) + [ | <------------------> | <-------->] + (dead) (alive) (sent) + || + (to be made dead) + ^ + (offset) + */ + + auto offset = total_alive + total_dead; Kokkos::parallel_for( - "CommunicateParticles", - total_send, - Lambda(index_t p) { - this_tag(permute_vector(offset + p)) = ParticleTag::dead; - }); - } - else{ - // Case: tags of sent particles that are not replaced by recevied particles are made dead - /* (received) (received) - [ | <------------------> |] - (dead) (alive) (sent) - || - (to be made dead) - ^ - (offset) - */ - auto offset = total_alive + total_recv; + "CommunicateParticles", + total_send, + Lambda(index_t p) { + this_tag(permute_vector(offset + p)) = ParticleTag::dead; + }); + } else { + // Case: tags of sent particles that are not replaced by recevied particles are made dead + /* (received) (received) + [ | <------------------> |] + (dead) (alive) (sent) + || + (to be made dead) + ^ + (offset) + */ + auto offset = total_alive + total_recv; Kokkos::parallel_for( - "CommunicateParticles", - total_send - (total_recv - total_dead), - Lambda(index_t p) { - this_tag(permute_vector(offset + p)) = ParticleTag::dead; - }); + "CommunicateParticles", + total_send - (total_recv - total_dead), + Lambda(index_t p) { + this_tag(permute_vector(offset + p)) = ParticleTag::dead; + }); } } - // Check if the particle tags are only dead or alive species.set_npart(npart + std::max(total_send, total_recv) - total_send); npart_per_tag_arr = species.npart_per_tag(); - //if (mpi_rank == 0) + // if (mpi_rank == 0) //{ - // std::cout << "After COMM: " << std::endl; - // std::cout << "Tag counts: "; - // for (std::size_t i = 0; i < tag_count; i++){ - // std::cout << "[" << npart_per_tag_arr[i] << "] "; - // } - // std::cout << std::endl; - // std::cout << "Holes filled: " << total_holes << " Total recv: " << total_recv << - // "Total send: " << total_send << std::endl; - // std::cout << std::endl << "*************"<< std::endl; - //} - #endif + // std::cout << "After COMM: " << std::endl; + // std::cout << "Tag counts: "; + // for (std::size_t i = 0; i < tag_count; i++){ + // std::cout << "[" << npart_per_tag_arr[i] << "] "; + // } + // std::cout << std::endl; + // std::cout << "Holes filled: " << total_holes << " Total recv: " << total_recv << + // "Total send: " << total_send << std::endl; + // std::cout << std::endl << "*************"<< std::endl; + // } +#endif } } - - template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; From c0d465205889775c30843d7368bd1f5c28a31f25 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 16 Dec 2024 18:29:21 -0500 Subject: [PATCH 058/124] tested prtlsort (WIP) --- benchmark/benchmark.cpp | 186 ++++++++++++------------ extern/Kokkos | 2 +- extern/adios2 | 2 +- extern/plog | 2 +- src/framework/containers/particles.h | 1 - src/framework/domain/comm_mpi.hpp | 18 +-- src/framework/domain/communications.cpp | 17 ++- src/framework/domain/metadomain.cpp | 3 + 8 files changed, 120 insertions(+), 111 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 797c8ed87..593b7f190 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -35,6 +35,7 @@ - Communicate particles to neighbors and time the communication - Compute the time taken for best of N iterations for the communication */ +using namespace ntt; // Set npart and set the particle tags to alive template @@ -94,101 +95,106 @@ void PushParticles(Domain& domain, } auto main(int argc, char* argv[]) -> int { - std::cout << "Constructing the domain" << std::endl; - ntt::GlobalInitialize(argc, argv); - // Create a Metadomain object - const unsigned int ndomains = 1; - const std::vector global_decomposition = { - { -1, -1, -1 } - }; - const std::vector global_ncells = { 32, 32, 32 }; - const boundaries_t global_extent = { - { 0.0, 3.0 }, - { 0.0, 3.0 }, - { 0.0, 3.0 } - }; - const boundaries_t global_flds_bc = { - { FldsBC::PERIODIC, FldsBC::PERIODIC }, - { FldsBC::PERIODIC, FldsBC::PERIODIC }, - { FldsBC::PERIODIC, FldsBC::PERIODIC } - }; - const boundaries_t global_prtl_bc = { - { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, - { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, - { PrtlBC::PERIODIC, PrtlBC::PERIODIC } - }; - const std::map metric_params = {}; - const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; - const double npart_to_send_frac = 0.01; - const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); - auto species = ntt::ParticlesSpecies(1u, - "test_e", - 1.0f, - 1.0f, - maxnpart, - ntt::PrtlPusher::BORIS, - false, - ntt::Cooling::NONE); - auto metadomain = Metadomain>( - ndomains, - global_decomposition, - global_ncells, - global_extent, - global_flds_bc, - global_prtl_bc, - metric_params, - { species }); + GlobalInitialize(argc, argv); + { + std::cout << "Constructing the domain" << std::endl; + // Create a Metadomain object + const unsigned int ndomains = 2; + const std::vector global_decomposition = { + {-1, -1, -1} + }; + const std::vector global_ncells = { 32, 32, 32 }; + const boundaries_t global_extent = { + {0.0, 3.0}, + {0.0, 3.0}, + {0.0, 3.0} + }; + const boundaries_t global_flds_bc = { + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC} + }; + const boundaries_t global_prtl_bc = { + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC} + }; + const std::map metric_params = {}; + const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; + const double npart_to_send_frac = 0.01; + const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); + auto species = ntt::ParticleSpecies(1u, + "test_e", + 1.0f, + 1.0f, + maxnpart, + ntt::PrtlPusher::BORIS, + false, + ntt::Cooling::NONE); + auto metadomain = Metadomain>( + ndomains, + global_decomposition, + global_ncells, + global_extent, + global_flds_bc, + global_prtl_bc, + metric_params, + { species }); - const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; - auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); - auto timers = timer::Timers { { "Communication" }, nullptr, false }; - InitializeParticleArrays(*local_domain, npart); - // Timers for both the communication routines - auto total_time_elapsed_old = 0; - auto total_time_elapsed_new = 0; + const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + auto timers = timer::Timers { { "Communication" }, nullptr, false }; + InitializeParticleArrays(*local_domain, npart); + // Timers for both the communication routines + auto total_time_elapsed_old = 0; + auto total_time_elapsed_new = 0; - int seed_ind = 0; - int seed_tag = 1; - Kokkos::fence(); + int seed_ind = 0; + int seed_tag = 1; + Kokkos::fence(); - for (int i = 0; i < 10; ++i) { - { - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort new - Kokkos::fence(); - auto start_new = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticlesBuffer(*local_domain, &timers); - auto stop_new = std::chrono::high_resolution_clock::now(); - auto duration_new = std::chrono::duration_cast( - stop_new - start_new) - .count(); - total_time_elapsed_new += duration_new; - Kokkos::fence(); - } - { - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort old - Kokkos::fence(); - auto start_old = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticles(*local_domain, &timers); - auto stop_old = std::chrono::high_resolution_clock::now(); - auto duration_old = std::chrono::duration_cast( - stop_old - start_old) - .count(); - total_time_elapsed_old += duration_old; - Kokkos::fence(); + for (int i = 0; i < 10; ++i) { + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort new + Kokkos::fence(); + auto start_new = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticlesBuffer(*local_domain, &timers); + auto stop_new = std::chrono::high_resolution_clock::now(); + auto duration_new = std::chrono::duration_cast( + stop_new - start_new) + .count(); + total_time_elapsed_new += duration_new; + Kokkos::fence(); + } + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort old + Kokkos::fence(); + auto start_old = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticles(*local_domain, &timers); + auto stop_old = std::chrono::high_resolution_clock::now(); + auto duration_old = std::chrono::duration_cast( + stop_old - start_old) + .count(); + total_time_elapsed_old += duration_old; + Kokkos::fence(); + } } + printf("Total time elapsed for old: %f us : %f us/prtl\n", + total_time_elapsed_old / 10.0, + total_time_elapsed_old / 10.0 * 1000 / npart); + printf("Total time elapsed for new: %f us : %f us/prtl\n", + total_time_elapsed_new / 10.0, + total_time_elapsed_new / 10.0 * 1000 / npart); } - std::cout << "Total time elapsed for old: " << total_time_elapsed_old - << " microseconds" << std::endl; - std::cout << "Total time elapsed for new: " << total_time_elapsed_new - << " microseconds" << std::endl; + GlobalFinalize(); return 0; } diff --git a/extern/Kokkos b/extern/Kokkos index b6a16bc9d..eb11070f6 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit b6a16bc9d88a9252d76e64fd2be20c58eb5d7f2e +Subproject commit eb11070f67565b2e660659f5207f0363bdf3b882 diff --git a/extern/adios2 b/extern/adios2 index 25ccd6aaa..b8761e2af 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit 25ccd6aaa810bbc217b43421f9c43140082c65b9 +Subproject commit b8761e2afab2cd05b89d09b2ee4da1cd7a834225 diff --git a/extern/plog b/extern/plog index 96637a6e5..85a871b13 160000 --- a/extern/plog +++ b/extern/plog @@ -1 +1 @@ -Subproject commit 96637a6e5e53f54e4e56d667d312c564d979ec0e +Subproject commit 85a871b13be0bd1a9e0110744fa60cc9bd1e8380 diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 7496db78c..86443c98f 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -193,7 +193,6 @@ namespace ntt { * @brief Count the number of particles with a specific tag. * @return The vector of counts for each tag. */ - [[nodiscard]] auto npart_per_tag() const -> std::vector; /* setters -------------------------------------------------------------- */ diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 9b2ad0a33..d7d19c983 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -296,18 +296,18 @@ namespace comm { int recv_rank, const range_tuple_t& send_slice, const range_tuple_t& recv_slice) { - auto array_h = Kokkos::create_mirror_view(arr); - Kokkos::deep_copy(array_h, arr); + // auto array_h = Kokkos::create_mirror_view(arr); + // Kokkos::deep_copy(array, arr); const std::size_t send_count = send_slice.second - send_slice.first; const std::size_t recv_count = recv_slice.second - recv_slice.first; if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { - MPI_Sendrecv(array_h.data() + send_slice.first, + MPI_Sendrecv(arr.data() + send_slice.first, send_count, mpi::get_type(), send_rank, 0, - array_h.data() + recv_slice.first, + arr.data() + recv_slice.first, recv_count, mpi::get_type(), recv_rank, @@ -315,14 +315,14 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { - MPI_Send(array_h.data() + send_slice.first, + MPI_Send(arr.data() + send_slice.first, send_count, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { - MPI_Recv(array_h.data() + recv_slice.first, + MPI_Recv(arr.data() + recv_slice.first, recv_count, mpi::get_type(), recv_rank, @@ -330,9 +330,9 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } - if ((recv_rank >= 0) and (recv_count > 0)) { - Kokkos::deep_copy(arr, array_h); - } + // if ((recv_rank >= 0) and (recv_count > 0)) { + // Kokkos::deep_copy(arr, array_h); + // } } void ParticleSendRecvCount(int send_rank, diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 5e5da4a0c..ff7edfec6 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -86,8 +86,8 @@ namespace ntt { } else { // no communication necessary return { - { 0, -1 }, - { 0, -1 } + {0, -1}, + {0, -1} }; } #if defined(MPI_ENABLED) @@ -110,8 +110,8 @@ namespace ntt { (void)send_rank; (void)recv_rank; return { - { send_ind, send_rank }, - { recv_ind, recv_rank } + {send_ind, send_rank}, + {recv_ind, recv_rank} }; } @@ -129,8 +129,8 @@ namespace ntt { const auto is_receiving = (recv_rank >= 0); if (not(is_sending or is_receiving)) { return { - { { 0, -1 }, {} }, - { { 0, -1 }, {} } + {{ 0, -1 }, {}}, + {{ 0, -1 }, {}} }; } auto send_slice = std::vector {}; @@ -196,8 +196,8 @@ namespace ntt { } return { - { { send_ind, send_rank }, send_slice }, - { { recv_ind, recv_rank }, recv_slice }, + {{ send_ind, send_rank }, send_slice}, + {{ recv_ind, recv_rank }, recv_slice}, }; } @@ -746,6 +746,7 @@ namespace ntt { // to the pth hole in the array, or after npart() if p > sent+dead count. Kokkos::View allocation_vector("allocation_vector", total_recv); + // @CRITICAL: this may overwrite unsent data Kokkos::parallel_for( "AllocationVector", total_recv, diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index 5e66bc366..ec8561a9a 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -46,6 +46,9 @@ namespace ntt { #if defined(MPI_ENABLED) MPI_Comm_size(MPI_COMM_WORLD, &g_mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &g_mpi_rank); + raise::ErrorIf(global_ndomains != g_mpi_size, + "Exactly 1 domain per MPI rank is allowed", + HERE); #endif initialValidityCheck(); From 646a208a5c122ef168a12ac169fab2acd2e5e454 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 17 Dec 2024 20:04:54 -0500 Subject: [PATCH 059/124] removed tag_offset array from the particle class. The npart_per_tag() method now returns a pair of npart_per_tag and tag_offset arrays --- src/framework/containers/particles.cpp | 16 ++++++---------- src/framework/containers/particles.h | 4 ---- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index c97f8da2d..52efdaf6d 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -47,9 +47,6 @@ namespace ntt { tag = array_t { label + "_tag", maxnpart }; tag_h = Kokkos::create_mirror_view(tag); - tag_offset = array_t { label + "_tag_offset", ntags() }; - tag_offset_h = Kokkos::create_mirror_view(tag_offset); - for (unsigned short n { 0 }; n < npld; ++n) { pld.push_back(array_t("pld", maxnpart)); pld_h.push_back(Kokkos::create_mirror_view(pld[n])); @@ -101,17 +98,16 @@ namespace ntt { auto npart_tag_host = Kokkos::create_mirror_view(npart_tag); Kokkos::deep_copy(npart_tag_host, npart_tag); - std::vector npart_tag_vec; + std::vector npart_tag_vec(ntags()); + std::vector tag_offset(ntags()); for (std::size_t t { 0 }; t < ntags(); ++t) { - npart_tag_vec.push_back(npart_tag_host(t)); - tag_offset_h(t) = (t > 0) ? npart_tag_vec[t - 1] : 0; + npart_tag_vec[t] = npart_tag_host(t); + tag_offset[t] = (t > 0) ? npart_tag_vec[t - 1] : 0; } for (std::size_t t { 0 }; t < ntags(); ++t) { - tag_offset_h(t) += (t > 0) ? tag_offset_h(t - 1) : 0; + tag_offset[t] += (t > 0) ? tag_offset[t - 1] : 0; } - // Copy to device - Kokkos::deep_copy(tag_offset, tag_offset_h); - return npart_tag_vec; + return std::make_pair(npart_tag_vec, tag_offset); } template diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 86443c98f..e4d78cd0d 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -60,8 +60,6 @@ namespace ntt { array_t dx1_prev, dx2_prev, dx3_prev; // Array to tag the particles array_t tag; - // Array to store the cumulative number of particles per tag - array_t tag_offset; // Array to store the particle load std::vector> pld; // phi coordinate (for axisymmetry) @@ -74,7 +72,6 @@ namespace ntt { array_mirror_t weight_h; array_mirror_t phi_h; array_mirror_t tag_h; - array_mirror_t tag_offset_h; std::vector> pld_h; // for empty allocation @@ -181,7 +178,6 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); - footprint += sizeof(int) * tag_offset.extent(0); for (auto& p : pld) { footprint += sizeof(real_t) * p.extent(0); } From 708115c4408d276e1e67f8afd291f793f7a6a831 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 17 Dec 2024 20:47:52 -0500 Subject: [PATCH 060/124] changed functions that called npart_per_tag() --- src/framework/containers/particles.cpp | 18 ++++++++------ src/framework/containers/particles.h | 2 +- src/framework/domain/communications.cpp | 31 ++++++++++++++----------- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index 52efdaf6d..fe2346132 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -78,7 +78,8 @@ namespace ntt { } template - auto Particles::npart_per_tag() const -> std::vector { + auto Particles::npart_per_tag() const -> std::pair, + array_t>{ auto this_tag = tag; array_t npart_tag("npart_tags", ntags()); @@ -97,23 +98,25 @@ namespace ntt { auto npart_tag_host = Kokkos::create_mirror_view(npart_tag); Kokkos::deep_copy(npart_tag_host, npart_tag); + array_t tag_offset("tag_offset", ntags()); + auto tag_offset_host = Kokkos::create_mirror_view(tag_offset); std::vector npart_tag_vec(ntags()); - std::vector tag_offset(ntags()); for (std::size_t t { 0 }; t < ntags(); ++t) { - npart_tag_vec[t] = npart_tag_host(t); - tag_offset[t] = (t > 0) ? npart_tag_vec[t - 1] : 0; + npart_tag_vec[t] = npart_tag_host(t); + tag_offset_host(t) = (t > 0) ? npart_tag_vec[t - 1] : 0; } for (std::size_t t { 0 }; t < ntags(); ++t) { - tag_offset[t] += (t > 0) ? tag_offset[t - 1] : 0; + tag_offset_host(t) += (t > 0) ? tag_offset_host(t - 1) : 0; } + Kokkos::deep_copy(tag_offset, tag_offset_host); return std::make_pair(npart_tag_vec, tag_offset); } template auto Particles::SortByTags() -> std::vector { if (npart() == 0 || is_sorted()) { - return npart_per_tag(); + return npart_per_tag().first; } using KeyType = array_t; using BinOp = sort::BinTag; @@ -156,7 +159,8 @@ namespace ntt { Sorter.sort(Kokkos::subview(phi, slice)); } - const auto np_per_tag = npart_per_tag(); + auto np_per_tag_tag_offset = npart_per_tag(); + const auto np_per_tag = np_per_tag_tag_offset.first; set_npart(np_per_tag[(short)(ParticleTag::alive)]); m_is_sorted = true; diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index e4d78cd0d..ea692bdd9 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -189,7 +189,7 @@ namespace ntt { * @brief Count the number of particles with a specific tag. * @return The vector of counts for each tag. */ - auto npart_per_tag() const -> std::vector; + auto npart_per_tag() const -> std::pair, array_t>; /* setters -------------------------------------------------------------- */ /** diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index ff7edfec6..36f7a1858 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -656,16 +656,18 @@ namespace ntt { HERE); logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { - auto npart_per_tag_arr = species.npart_per_tag(); - auto npart = static_cast(species.npart()); - auto total_alive = static_cast( - npart_per_tag_arr[ParticleTag::alive]); - auto total_dead = static_cast( - npart_per_tag_arr[ParticleTag::dead]); - auto total_holes = static_cast(npart - total_alive); - auto total_send = static_cast(npart - total_alive - total_dead); - auto total_recv = static_cast(0); - auto tag_count = static_cast(npart_per_tag_arr.size()); + // TO DO: npart per tag must return npart_per_tag_arr and the cumsum array + auto [npart_per_tag_arr, + tag_offset] = species.npart_per_tag(); + auto npart = static_cast(species.npart()); + auto total_alive = static_cast( + npart_per_tag_arr[ParticleTag::alive]); + auto total_dead = static_cast( + npart_per_tag_arr[ParticleTag::dead]); + auto total_holes = static_cast(npart - total_alive); + auto total_send = static_cast(npart - total_alive - total_dead); + auto total_recv = static_cast(0); + auto tag_count = static_cast(npart_per_tag_arr.size()); std::vector send_ranks, send_inds; std::vector recv_ranks, recv_inds; @@ -715,7 +717,6 @@ namespace ntt { (dead) (alive) (tag1) ... */ auto& this_tag = species.tag; - auto& this_tag_offset = species.tag_offset; Kokkos::View permute_vector("permute_vector", species.npart()); Kokkos::View current_offset("current_offset", species.ntags()); // @TODO: do not save tag = 1 particles into permute_vector @@ -726,7 +727,7 @@ namespace ntt { species.npart(), Lambda(index_t p) { const auto current_tag = this_tag(p); - const auto idx_permute_vec = this_tag_offset(current_tag) + + const auto idx_permute_vec = tag_offset(current_tag) + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -826,11 +827,13 @@ namespace ntt { } // Tuple that contains the start and end indices of permtute_vec pointing to a given tag type = dir2tag(dir) + auto tag_offset_h = Kokkos::create_mirror_view(tag_offset); + Kokkos::deep_copy(tag_offset_h, tag_offset); auto range_permute = std::make_pair( static_cast( - species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)]), + tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)]), static_cast( - species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)] + + tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)] + npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); // Tuple that contains the start and end indices for allocation_vector pointing to a given tag type = dir2tag(dir) auto range_allocate = std::make_pair( From d6a325b3251e414331f89d85eba0ad061fb07bfe Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Thu, 19 Dec 2024 15:14:43 -0500 Subject: [PATCH 061/124] changed comms to dispatch arrays of same type in one buffer --- src/framework/domain/comm_mpi.hpp | 619 +++++++++++++++--------- src/framework/domain/communications.cpp | 433 ++++++++--------- 2 files changed, 593 insertions(+), 459 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index d7d19c983..66ea17d23 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -14,6 +14,7 @@ #include "enums.h" #include "global.h" +#include "arch/directions.h" #include "arch/kokkos_aliases.h" #include "arch/mpi_aliases.h" #include "utils/error.h" @@ -296,8 +297,8 @@ namespace comm { int recv_rank, const range_tuple_t& send_slice, const range_tuple_t& recv_slice) { - // auto array_h = Kokkos::create_mirror_view(arr); - // Kokkos::deep_copy(array, arr); + //auto arr_h = Kokkos::create_mirror_view(arr); + //Kokkos::deep_copy(arr_h, arr); const std::size_t send_count = send_slice.second - send_slice.first; const std::size_t recv_count = recv_slice.second - recv_slice.first; if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and @@ -331,7 +332,7 @@ namespace comm { MPI_STATUS_IGNORE); } // if ((recv_rank >= 0) and (recv_count > 0)) { - // Kokkos::deep_copy(arr, array_h); + // Kokkos::deep_copy(arr, arr_h); // } } @@ -458,256 +459,398 @@ namespace comm { return recv_count; } - template - void CommunicateParticleQuantityBuffer(array_t& arr, - int send_rank, - int recv_rank, - const range_tuple_t& send_slice, - const range_tuple_t& recv_slice, - Kokkos::View indices_to_send, - Kokkos::View indices_to_allocate) { - - array_t buffer("buffer", - indices_to_send.extent(0) + indices_to_allocate.extent(0)); - // Populate the buffer for particle array - Kokkos::parallel_for( - "PopulateBuffer", - indices_to_send.extent(0), - Lambda(const size_t i) { buffer(i) = arr(indices_to_send(i)); }); - CommunicateParticleQuantity(buffer, send_rank, recv_rank, send_slice, recv_slice); - // Populate from buffer to the particle array - Kokkos::parallel_for( - "PopulateFromBuffer", - indices_to_allocate.extent(0), - Lambda(const size_t i) { - arr(indices_to_allocate(i)) = buffer(indices_to_send.extent(0) + i); - }); - return; - } template - void CommunicateParticlesBuffer(Particles& species, - Kokkos::View indices_to_send, - Kokkos::View indices_to_allocate, - int send_rank, - int recv_rank, - std::vector shifts_in_x) { - if ((send_rank < 0) && (recv_rank < 0)) { - raise::Error("No send or recv in SendRecvParticlesBuffered", HERE); - } - // First set the tags of the sent particles to be dead - auto& this_tag = species.tag; - // Kokkos::parallel_for( - //"SetTagDead", - // Kokkos::RangePolicy(0, indices_to_allocate.size()), - // KOKKOS_LAMBDA(const size_t i) { - // const auto idx = indices_to_send(i); - // this_tag(idx) = static_cast(ParticleTag::dead); - // }); - - // Construct send and receive slice for the buffer - auto send_slice = range_tuple_t({ 0, indices_to_send.size() }); - auto recv_slice = range_tuple_t( - { indices_to_send.size(), - indices_to_send.size() + indices_to_allocate.size() }); - // Send and receive the particles - CommunicateParticleQuantityBuffer(species.i1, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx1, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.i1_prev, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx1_prev, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - if constexpr (D == Dim::_2D || D == Dim::_3D) { - CommunicateParticleQuantityBuffer(species.i2, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx2, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.i2_prev, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx2_prev, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); + void CommunicateParticlesBuffer(Particles& species, + Kokkos::View permute_vector, + Kokkos::View allocation_vector, + Kokkos::View tag_offset, + std::vector npart_per_tag_arr, + std::vector npart_per_tag_arr_recv, + std::vector send_ranks, + std::vector recv_ranks) { + // Pointers to the particle data arrays + auto &this_ux1 = species.ux1; + auto &this_ux2 = species.ux2; + auto &this_ux3 = species.ux3; + auto &this_weight = species.weight; + auto &this_phi = species.phi; + auto &this_i1 = species.i1; + auto &this_i1_prev = species.i1_prev; + auto &this_i2 = species.i2; + auto &this_i3 = species.i3; + auto &this_i2_prev = species.i2_prev; + auto &this_i3_prev = species.i3_prev; + auto &this_dx1 = species.dx1; + auto &this_dx1_prev = species.dx1_prev; + auto &this_dx2 = species.dx2; + auto &this_dx3 = species.dx3; + auto &this_dx2_prev = species.dx2_prev; + auto &this_dx3_prev = species.dx3_prev; + auto &this_tag = species.tag; + + // Number of arrays of each type to send/recv + auto NREALS = 4; + auto NINTS = 2; + auto NFLOATS = 2; + if constexpr (D == Dim::_2D) { + this_i2 = species.i2; + this_i2_prev = species.i2_prev; + this_dx2 = species.dx2; + this_dx2_prev = species.dx2_prev; + if (C != Coord::Cart) { + NREALS = 5; + NINTS = 4; + NFLOATS = 4; + this_phi = species.phi; + } else { + NREALS = 4; + NINTS = 4; + NFLOATS = 4; + } } if constexpr (D == Dim::_3D) { - CommunicateParticleQuantityBuffer(species.i3, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx3, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.i3_prev, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx3_prev, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); + this_i2 = species.i2; + this_i2_prev = species.i2_prev; + this_dx2 = species.dx2; + this_dx2_prev = species.dx2_prev; + this_i3 = species.i3; + this_i3_prev = species.i3_prev; + this_dx3 = species.dx3; + this_dx3_prev = species.dx3_prev; + NREALS = 4; + NINTS = 6; + NFLOATS = 6; } - CommunicateParticleQuantityBuffer(species.ux1, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.ux2, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.ux3, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.weight, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - if constexpr (D == Dim::_2D and C != Coord::Cart) { - CommunicateParticleQuantityBuffer(species.phi, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - } - for (auto p { 0 }; p < species.npld(); ++p) { - CommunicateParticleQuantityBuffer(species.pld[p], - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); + + // Now make buffers to store recevied data (don't need global send buffers) + const auto total_send = permute_vector.extent(0) - npart_per_tag_arr[ParticleTag::dead]; + const auto total_recv = allocation_vector.extent(0); + const auto n_alive = npart_per_tag_arr[ParticleTag::alive]; + const auto n_dead = npart_per_tag_arr[ParticleTag::dead]; + + /* + Brief on recv buffers: Each recv buffer contains all the received arrays of + a given type. The different physical quantities are stored next to each other + to avoid cache misses. The array is structured as follows: + E.g., + recv_buffer_int: | qty1 | qty2 | ... | qtyNINTS | qty1 | qty2 | ... | qtyNINTS | ... + <-------particle to recv1------> <-------particle to recv2--------> + <----------------------------------total_recv----------------------------> + */ + Kokkos::View recv_buffer_int("recv_buffer_int", total_recv * NINTS); + Kokkos::View recv_buffer_real("recv_buffer_real", total_recv * NREALS); + Kokkos::View recv_buffer_prtldx("recv_buffer_prtldx",total_recv * NFLOATS); + auto recv_buffer_int_h = Kokkos::create_mirror_view(recv_buffer_int); + auto recv_buffer_real_h = Kokkos::create_mirror_view(recv_buffer_real); + auto recv_buffer_prtldx_h = Kokkos::create_mirror_view(recv_buffer_prtldx); + + + auto iteration = 0; + auto current_received = 0; + for (auto& direction : dir::Directions::all) { + const auto send_rank = send_ranks[iteration]; + const auto recv_rank = recv_ranks[iteration]; + const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto send_count = npart_per_tag_arr[tag_send]; + const auto recv_count = npart_per_tag_arr_recv[tag_recv]; + if (send_rank < 0 and recv_rank < 0) { + continue; + } + Kokkos::View send_buffer_int("send_buffer_int", send_count * NINTS); + Kokkos::View send_buffer_real("send_buffer_real", send_count * NREALS); + Kokkos::View send_buffer_prtldx("send_buffer_prtldx",send_count * NFLOATS); + auto send_buffer_int_h = Kokkos::create_mirror_view(send_buffer_int); + auto send_buffer_real_h = Kokkos::create_mirror_view(send_buffer_real); + auto send_buffer_prtldx_h = Kokkos::create_mirror_view(send_buffer_prtldx); + + // Need different constexpr parallel fors for different dims + if constexpr(D == Dim::_1D) { + Kokkos::parallel_for( + "PopulateSendBuffer", + send_count, + Lambda(const std::size_t p){ + const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); + send_buffer_int(NINTS * p + 0) = this_i1(idx); + send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); + send_buffer_real(NREALS * p + 0) = this_ux1(idx); + send_buffer_real(NREALS * p + 1) = this_ux2(idx); + send_buffer_real(NREALS * p + 2) = this_ux3(idx); + send_buffer_real(NREALS * p + 3) = this_weight(idx); + send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); + send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); + }); + } + if constexpr(D == Dim::_2D && C == Coord::Cart) { + Kokkos::parallel_for( + "PopulateSendBuffer", + send_count, + Lambda(const std::size_t p){ + const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); + send_buffer_int(NINTS * p + 0) = this_i1(idx); + send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); + send_buffer_int(NINTS * p + 2) = this_i2(idx); + send_buffer_int(NINTS * p + 3) = this_i2_prev(idx); + send_buffer_real(NREALS * p + 0) = this_ux1(idx); + send_buffer_real(NREALS * p + 1) = this_ux2(idx); + send_buffer_real(NREALS * p + 2) = this_ux3(idx); + send_buffer_real(NREALS * p + 3) = this_weight(idx); + send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); + send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); + send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); + send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + }); + } + if constexpr(D == Dim::_2D && C != Coord::Cart) { + Kokkos::parallel_for( + "PopulateSendBuffer", + send_count, + Lambda(const std::size_t p){ + const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); + send_buffer_int(NINTS * p + 0) = this_i1(idx); + send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); + send_buffer_int(NINTS * p + 2) = this_i2(idx); + send_buffer_int(NINTS * p + 3) = this_i2_prev(idx); + send_buffer_real(NREALS * p + 0) = this_ux1(idx); + send_buffer_real(NREALS * p + 1) = this_ux2(idx); + send_buffer_real(NREALS * p + 2) = this_ux3(idx); + send_buffer_real(NREALS * p + 3) = this_weight(idx); + send_buffer_real(NREALS * p + 4) = this_phi(idx); + send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); + send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); + send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); + send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + }); + } + if constexpr(D == Dim::_3D) { + Kokkos::parallel_for( + "PopulateSendBuffer", + send_count, + Lambda(const std::size_t p){ + const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); + send_buffer_int(NINTS * p + 0) = this_i1(idx); + send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); + send_buffer_int(NINTS * p + 2) = this_i2(idx); + send_buffer_int(NINTS * p + 3) = this_i2_prev(idx); + send_buffer_int(NINTS * p + 4) = this_i3(idx); + send_buffer_int(NINTS * p + 5) = this_i3_prev(idx); + send_buffer_real(NREALS * p + 0) = this_ux1(idx); + send_buffer_real(NREALS * p + 1) = this_ux2(idx); + send_buffer_real(NREALS * p + 2) = this_ux3(idx); + send_buffer_real(NREALS * p + 3) = this_weight(idx); + send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); + send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); + send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); + send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + send_buffer_prtldx(NFLOATS * p + 4) = this_dx3(idx); + send_buffer_prtldx(NFLOATS * p + 5) = this_dx3_prev(idx); + }); + } + + auto tag_offset_h = Kokkos::create_mirror_view(tag_offset); + Kokkos::deep_copy(tag_offset_h, tag_offset); + /* + Brief on receive offset: + The receive buffer looks like this + <-----------------------------------> + |NINT|NINT|NINT|NINT|NINT|NINT|NINT|NINT|...xnrecv + <--------><--------><--------><--------> + recv1 recv2 recv3 recv4 + |________| + ^ ^ + offset offset + nrecv + */ + const auto receive_offset_int = current_received * NINTS; + const auto receive_offset_real = current_received * NREALS; + const auto receive_offset_prtldx = current_received * NFLOATS; + // Comms + // Make host arrays for send and recv buffers + Kokkos::deep_copy(send_buffer_int_h, send_buffer_int); + Kokkos::deep_copy(send_buffer_real_h, send_buffer_real); + Kokkos::deep_copy(send_buffer_prtldx_h, send_buffer_prtldx); + + if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and + (recv_count > 0)) { + MPI_Sendrecv(send_buffer_int_h.data(), + send_count * NINTS, + mpi::get_type(), + send_rank, + 0, + recv_buffer_int_h.data() + receive_offset_int, + recv_count*NINTS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buffer_real_h.data(), + send_count * NREALS, + mpi::get_type(), + send_rank, + 0, + recv_buffer_real_h.data() + receive_offset_real, + recv_count*NREALS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buffer_prtldx_h.data(), + send_count * NFLOATS, + mpi::get_type(), + send_rank, + 0, + recv_buffer_prtldx_h.data() + receive_offset_prtldx, + recv_count*NFLOATS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } else if ((send_rank >= 0) and (send_count > 0)) { + MPI_Send(send_buffer_int_h.data(), + send_count * NINTS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + MPI_Send(send_buffer_real_h.data(), + send_count * NREALS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + MPI_Send(send_buffer_prtldx_h.data(), + send_count * NFLOATS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + } else if ((recv_rank >= 0) and (recv_count > 0)) { + MPI_Recv(recv_buffer_int_h.data() + receive_offset_int, + recv_count * NINTS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Recv(recv_buffer_real_h.data() + receive_offset_real, + recv_count * NREALS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Recv(recv_buffer_prtldx_h.data() + receive_offset_prtldx, + recv_count * NFLOATS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); } - // Set the tag for the received particles to be alive and perform the necessary displacements - if constexpr (D == Dim::_1D) { - const auto shift_in_x1 = shifts_in_x[0]; - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; + current_received += recv_count; + iteration++; + } // end over direction loop + Kokkos::deep_copy(recv_buffer_int, recv_buffer_int_h); + Kokkos::deep_copy(recv_buffer_real, recv_buffer_real_h); + Kokkos::deep_copy(recv_buffer_prtldx, recv_buffer_prtldx_h); + if constexpr (D == Dim::_1D) + { Kokkos::parallel_for( - "SetTagAlive", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { - const auto idx = indices_to_allocate(i); - this_tag(idx) = static_cast(ParticleTag::alive); - this_i1(idx) += shift_in_x1; - this_i1_prev(idx) += shift_in_x1; - }); + "PopulateFromRecvBuffer", + total_recv, + Lambda(const std::size_t p){ + auto idx = allocation_vector(p); + this_tag(idx) = ParticleTag::alive; + this_i1(idx) = recv_buffer_int(NINTS * p + 0); + this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); + this_ux1(idx) = recv_buffer_real(NREALS * p + 0); + this_ux2(idx) = recv_buffer_real(NREALS * p + 1); + this_ux3(idx) = recv_buffer_real(NREALS * p + 2); + this_weight(idx) = recv_buffer_real(NREALS * p + 3); + this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); + this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); + }); } - else if constexpr (D == Dim::_2D) { - const auto shift_in_x1 = shifts_in_x[0]; - const auto shift_in_x2 = shifts_in_x[1]; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; + if constexpr (D == Dim::_2D && C == Coord::Cart) + { + Kokkos::parallel_for( + "PopulateFromRecvBuffer", + total_recv, + Lambda(const std::size_t p){ + auto idx = allocation_vector(p); + this_tag(idx) = ParticleTag::alive; + this_i1(idx) = recv_buffer_int(NINTS * p + 0); + this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); + this_i2(idx) = recv_buffer_int(NINTS * p + 2); + this_i2_prev(idx) = recv_buffer_int(NINTS * p + 3); + this_ux1(idx) = recv_buffer_real(NREALS * p + 0); + this_ux2(idx) = recv_buffer_real(NREALS * p + 1); + this_ux3(idx) = recv_buffer_real(NREALS * p + 2); + this_weight(idx) = recv_buffer_real(NREALS * p + 3); + this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); + this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); + this_dx2(idx) = recv_buffer_prtldx(NFLOATS * p + 2); + this_dx2_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 3); + }); + } + + if constexpr (D == Dim::_2D && C == Coord::Cart) + { Kokkos::parallel_for( - "SetTagAlive", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { - const auto idx = indices_to_allocate(i); - this_tag(idx) = static_cast(ParticleTag::alive); - this_i1(idx) += shift_in_x1; - this_i2(idx) += shift_in_x2; - this_i1_prev(idx) += shift_in_x1; - this_i2_prev(idx) += shift_in_x2; - }); + "PopulateFromRecvBuffer", + total_recv, + Lambda(const std::size_t p){ + auto idx = allocation_vector(p); + this_tag(idx) = ParticleTag::alive; + this_i1(idx) = recv_buffer_int(NINTS * p + 0); + this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); + this_i2(idx) = recv_buffer_int(NINTS * p + 2); + this_i2_prev(idx) = recv_buffer_int(NINTS * p + 3); + this_ux1(idx) = recv_buffer_real(NREALS * p + 0); + this_ux2(idx) = recv_buffer_real(NREALS * p + 1); + this_ux3(idx) = recv_buffer_real(NREALS * p + 2); + this_weight(idx) = recv_buffer_real(NREALS * p + 3); + this_phi(idx) = recv_buffer_real(NREALS * p + 4); + this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); + this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); + this_dx2(idx) = recv_buffer_prtldx(NFLOATS * p + 2); + this_dx2_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 3); + }); } - else if constexpr (D == Dim::_3D) { - const auto shift_in_x1 = shifts_in_x[0]; - const auto shift_in_x2 = shifts_in_x[1]; - const auto shift_in_x3 = shifts_in_x[2]; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i3 = species.i3; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; - auto& this_i3_prev = species.i3_prev; + if constexpr (D == Dim::_3D) + { Kokkos::parallel_for( - "SetTagAlive", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { - const auto idx = indices_to_allocate(i); - this_tag(idx) = static_cast(ParticleTag::alive); - this_i1(idx) += shift_in_x1; - this_i2(idx) += shift_in_x2; - this_i3(idx) += shift_in_x3; - this_i1_prev(idx) += shift_in_x1; - this_i2_prev(idx) += shift_in_x2; - this_i3_prev(idx) += shift_in_x3; - }); + "PopulateFromRecvBuffer", + total_recv, + Lambda(const std::size_t p){ + auto idx = allocation_vector(p); + this_tag(idx) = ParticleTag::alive; + this_i1(idx) = recv_buffer_int(NINTS * p + 0); + this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); + this_i2(idx) = recv_buffer_int(NINTS * p + 2); + this_i2_prev(idx) = recv_buffer_int(NINTS * p + 3); + this_i3(idx) = recv_buffer_int(NINTS * p + 4); + this_i3_prev(idx) = recv_buffer_int(NINTS * p + 5); + this_ux1(idx) = recv_buffer_real(NREALS * p + 0); + this_ux2(idx) = recv_buffer_real(NREALS * p + 1); + this_ux3(idx) = recv_buffer_real(NREALS * p + 2); + this_weight(idx) = recv_buffer_real(NREALS * p + 3); + this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); + this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); + this_dx2(idx) = recv_buffer_prtldx(NFLOATS * p + 2); + this_dx2_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 3); + this_dx3(idx) = recv_buffer_prtldx(NFLOATS * p + 4); + this_dx3_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 5); + }); } - Kokkos::fence(); + species.set_npart(species.npart() + std::max(total_send, total_recv) - total_send); return; - } +} } // namespace comm diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 36f7a1858..cdc9e5b5a 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -656,7 +656,15 @@ namespace ntt { HERE); logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { - // TO DO: npart per tag must return npart_per_tag_arr and the cumsum array + /* + Brief on arrays + npart_per_tag_arr (vector): | dead count| alive count | tag=1 count | tag=2 count | ... + <--------------------------size = ntags()--------------------------> + tag_offset (Kokkos::View): | 0 | dead count | dead + alive count | dead + alive + tag=1 count | ... + <--------------------------size = ntags()--------------------------> + npart_per_tag_arr_recv (vector): | 0 | 0 | nrecv1 | nrecv2 | ... + <--------------------------size = ntags()--------------------------> + */ auto [npart_per_tag_arr, tag_offset] = species.npart_per_tag(); auto npart = static_cast(species.npart()); @@ -665,21 +673,24 @@ namespace ntt { auto total_dead = static_cast( npart_per_tag_arr[ParticleTag::dead]); auto total_holes = static_cast(npart - total_alive); - auto total_send = static_cast(npart - total_alive - total_dead); auto total_recv = static_cast(0); - auto tag_count = static_cast(npart_per_tag_arr.size()); std::vector send_ranks, send_inds; std::vector recv_ranks, recv_inds; // at this point particles should already by tagged in the pusher #if defined(MPI_ENABLED) - // Defined for debugging - int mpi_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); - - // array that holds the number of particles to be received per tag - std::vector npart_per_tag_arr_recv(tag_count, 0); + std::vector npart_per_tag_arr_recv(species.ntags(), 0); + Kokkos::View shifts_in_x1("shifts_in_x1", species.ntags()); + Kokkos::View shifts_in_x2("shifts_in_x2", species.ntags()); + Kokkos::View shifts_in_x3("shifts_in_x3", species.ntags()); + auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); + auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); + auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); + + // Get receive counts + displacements for (auto& direction : dir::Directions::all) { + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); const auto [send_params, recv_params] = GetSendRecvParams(this, domain, direction, true); const auto [send_indrank, send_slice] = send_params; @@ -689,253 +700,233 @@ namespace ntt { if (send_rank < 0 and recv_rank < 0) { continue; } - const auto send_dir_tag = mpi::PrtlSendTag::dir2tag(direction); - const auto nsend = npart_per_tag_arr[send_dir_tag]; + const auto nsend = npart_per_tag_arr[tag_send]; std::size_t nrecv = 0; - // Get the receive count + send_ranks.push_back(send_rank); recv_ranks.push_back(recv_rank); send_inds.push_back(send_ind); recv_inds.push_back(recv_ind); comm::ParticleSendRecvCount(send_rank, recv_rank, nsend, nrecv); total_recv += nrecv; - npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)] = nrecv; - } - - raise::FatalIf((npart + total_recv) >= species.maxnpart(), - "Too many particles to receive (cannot fit into maxptl)", - HERE); - // Now we know the number of particles to be sent and received per direction - /* permute vector contains the indices of the tags to send and receive - in the order of the directions - E.g., consider the following tag array - [ 0, 0, 3, 0, 1,...] - Then, permute vector will look something like - [0, 1, 3, ..., 4, ..., ... 2, ... ] - |<--------- >| |<----->| |<----->| .... - tag=0 ct tag=1 ct tag=3 ct - (dead) (alive) (tag1) ... - */ - auto& this_tag = species.tag; - Kokkos::View permute_vector("permute_vector", species.npart()); - Kokkos::View current_offset("current_offset", species.ntags()); - // @TODO: do not save tag = 1 particles into permute_vector - // instead of species.npart(), size will be species.npart() - npart_per_tag[ParticleTag::alive]; - - Kokkos::parallel_for( - "PermuteVector", - species.npart(), - Lambda(index_t p) { - const auto current_tag = this_tag(p); - const auto idx_permute_vec = tag_offset(current_tag) + - Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; - }); - - // Check: add the end of the loop, current_offset should be equal to npart_per_tag - auto current_offset_h = Kokkos::create_mirror_view(current_offset); - Kokkos::deep_copy(current_offset_h, current_offset); - for (std::size_t i { 0 }; i < current_offset_h.size(); ++i) { - raise::FatalIf(current_offset_h(i) != npart_per_tag_arr[i], - "Error in permute vector construction", - HERE); - } - - // allocation_vector(p) assigns the pth received particle - // to the pth hole in the array, or after npart() if p > sent+dead count. - Kokkos::View allocation_vector("allocation_vector", total_recv); - - // @CRITICAL: this may overwrite unsent data - Kokkos::parallel_for( - "AllocationVector", - total_recv, - Lambda(index_t p) { - // Case: received particle count less than dead particle count -> replace dead particles - if (p < total_dead) { - allocation_vector(p) = permute_vector(p); - } - // Case: received particle count > dead particle count but < sent particle count -> replace - // sent particles - else if (p < total_holes && p >= total_dead) { - allocation_vector(p) = permute_vector(total_alive + p); - } - // Case: received particle count exceeds sent + dead particles -> append at the end - else { - allocation_vector(p) = static_cast(npart + (p - total_holes)); - } - }); - Kokkos::fence(); + npart_per_tag_arr_recv[tag_recv] = nrecv; - std::size_t count_recv = 0; - std::size_t iteration = 0; - // Main loop over all direction where we send the data - for (auto& direction : dir::Directions::all) { - // When nowhere to send and receive - auto send_rank = send_ranks[iteration]; - auto recv_rank = recv_ranks[iteration]; - - if (send_rank < 0 and recv_rank < 0) { - continue; - } - // Get the coordinate shifts in xi - std::vector shifts_in_x; - auto recv_ind = recv_inds[iteration]; + // @CRITICAL: Ask Hayk if the displacements are correctly set before sending if constexpr (D == Dim::_1D) { - int shift_in_x1 { 0 }; if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x1); } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = domain.mesh.n_active(in::x1); } - shifts_in_x.push_back(shift_in_x1); } else if constexpr (D == Dim::_2D) { - int shift_in_x1 { 0 }, shift_in_x2 { 0 }; if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x1); } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active()[0]; + shifts_in_x1_h(tag_send) = domain.mesh.n_active()[0]; } if ((-direction)[1] == -1) { - shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); + shifts_in_x2_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x2); } else if ((-direction)[1] == 1) { - shift_in_x2 = domain.mesh.n_active(in::x2); + shifts_in_x2_h(tag_send) = domain.mesh.n_active(in::x2); } - shifts_in_x.push_back(shift_in_x1); - shifts_in_x.push_back(shift_in_x2); } else if constexpr (D == Dim::_3D) { - int shift_in_x1 { 0 }, shift_in_x2 { 0 }, shift_in_x3 { 0 }; if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x1); } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = domain.mesh.n_active(in::x1); } if ((-direction)[1] == -1) { - shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); + shifts_in_x2_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x2); } else if ((-direction)[1] == 1) { - shift_in_x2 = domain.mesh.n_active(in::x2); + shifts_in_x2_h(tag_send) = domain.mesh.n_active(in::x2); } if ((-direction)[2] == -1) { - shift_in_x3 = -subdomain(recv_ind).mesh.n_active(in::x3); + shifts_in_x3_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x3); } else if ((-direction)[2] == 1) { - shift_in_x3 = domain.mesh.n_active(in::x3); + shifts_in_x3_h(tag_send) = domain.mesh.n_active(in::x3); } - shifts_in_x.push_back(shift_in_x1); - shifts_in_x.push_back(shift_in_x2); - shifts_in_x.push_back(shift_in_x3); } + } // end directions loop + Kokkos::deep_copy(shifts_in_x1, shifts_in_x1_h); + Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); + Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); - // Tuple that contains the start and end indices of permtute_vec pointing to a given tag type = dir2tag(dir) - auto tag_offset_h = Kokkos::create_mirror_view(tag_offset); - Kokkos::deep_copy(tag_offset_h, tag_offset); - auto range_permute = std::make_pair( - static_cast( - tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)]), - static_cast( - tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)] + - npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); - // Tuple that contains the start and end indices for allocation_vector pointing to a given tag type = dir2tag(dir) - auto range_allocate = std::make_pair( - static_cast(count_recv), - static_cast( - count_recv + - npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)])); - // @TODO: check subview index - // contains the indices of all particles of a given tag = mpi::PrtlSendTag::dir2tag(direction) - auto indices_to_send = Kokkos::subview(permute_vector, range_permute); - // contains the indices of the holes where the received particles will be placed - auto indices_to_allocate = Kokkos::subview(allocation_vector, - range_allocate); - - // Main function that sends the particles and receives the arrays - comm::CommunicateParticlesBuffer(species, - indices_to_send, - indices_to_allocate, - send_rank, - recv_rank, - shifts_in_x); - count_recv += npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)]; - iteration++; + raise::FatalIf((npart + total_recv) >= species.maxnpart(), + "Too many particles to receive (cannot fit into maxptl)", + HERE); + + auto& this_tag = species.tag; + auto& this_i1 = species.i1; + auto& this_i1_prev = species.i1_prev; + auto& this_i2 = species.i2; + auto& this_i2_prev = species.i2_prev; + auto& this_i3 = species.i3; + auto& this_i3_prev = species.i3_prev; + + /* + Brief on permute vector: It contains the sorted indices of tag != alive particles + E.g., consider the following tag array + species.tag = [ 0, 0, 3, 0, 1, 2,...] + Then, permute vector will look something like + permute_vector = [0, 1, 3, ..., 4, ..., ... 5, ... ] + |<--------- >| |<----->| |<----->| .... + tag=0 ct tag=1 ct tag=2 ct + */ + Kokkos::View permute_vector("permute_vector", total_holes); + Kokkos::View current_offset("current_offset", species.ntags()); + auto &this_tag_offset = tag_offset; + + auto n_alive = npart_per_tag_arr[ParticleTag::alive]; + + if constexpr (D == Dim::_1D){ + Kokkos::parallel_for( + "PermuteVector and Displace", + species.npart(), + Lambda(index_t p) { + const auto current_tag = this_tag(p); + if (current_tag != ParticleTag::alive){ + // dead tags only + if (current_tag == ParticleTag::dead) { + const auto idx_permute_vec = Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + } + // tag = 1->N (excluding dead and alive) + else{ + const auto idx_permute_vec = this_tag_offset(current_tag) - + n_alive + + Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + permute_vector(idx_permute_vec) = p; + this_i1(p) += shifts_in_x1(current_tag); + this_i1_prev(p) += shifts_in_x1(current_tag); + } + } + }); } - // Compute where the received particles are allocated - // if (mpi_rank == 0){ - // Kokkos::View particles_allocated_per_tag("particles - // allocated per tag", tag_count); Kokkos::parallel_for( - // "ParticlesAllocatedPerTag", - // total_recv, - // Lambda(const std::size_t i) { - // auto index = allocation_vector(i); - // auto tag = this_tag(index); - // Kokkos::atomic_fetch_add(&particles_allocated_per_tag(tag), 1); - // }); - // Kokkos::fence(); - // auto particles_allocated_per_tag_h = - // Kokkos::create_mirror_view(particles_allocated_per_tag); - // Kokkos::deep_copy(particles_allocated_per_tag_h, - // particles_allocated_per_tag); - - // std::cout << "Particles allocated per tag (post recv): "; - // for (std::size_t i = 0; i < tag_count; i++){ - // std::cout << "[" << particles_allocated_per_tag_h[i] << "] "; - // } - // std::cout << std::endl; - // } - // If receive count is less than send count then make the tags of sent dead - if (total_recv <= total_holes) { - if (total_recv <= total_dead) { - // Case: all sent particles' tags are set to dead - /* (received) - [ | <------------------> | <-------->] - (dead) (alive) (sent) - || - (to be made dead) - ^ - (offset) - */ - - auto offset = total_alive + total_dead; - Kokkos::parallel_for( - "CommunicateParticles", - total_send, - Lambda(index_t p) { - this_tag(permute_vector(offset + p)) = ParticleTag::dead; - }); - } else { - // Case: tags of sent particles that are not replaced by recevied particles are made dead - /* (received) (received) - [ | <------------------> |] - (dead) (alive) (sent) - || - (to be made dead) - ^ - (offset) - */ - auto offset = total_alive + total_recv; - Kokkos::parallel_for( - "CommunicateParticles", - total_send - (total_recv - total_dead), - Lambda(index_t p) { - this_tag(permute_vector(offset + p)) = ParticleTag::dead; - }); + + if constexpr (D == Dim::_2D){ + Kokkos::parallel_for( + "PermuteVector and Displace", + species.npart(), + Lambda(index_t p) { + const auto current_tag = this_tag(p); + if (current_tag != ParticleTag::alive){ + // dead tags only + if (current_tag == ParticleTag::dead) { + const auto idx_permute_vec = Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + } + // tag = 1->N (excluding dead and alive) + else{ + const auto idx_permute_vec = this_tag_offset(current_tag) - + n_alive + + Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + permute_vector(idx_permute_vec) = p; + this_i1(p) += shifts_in_x1(current_tag); + this_i1_prev(p) += shifts_in_x1(current_tag); + this_i2(p) += shifts_in_x2(current_tag); + this_i2_prev(p) += shifts_in_x2(current_tag); + } + } + }); + } + + if constexpr (D == Dim::_3D){ + Kokkos::parallel_for( + "PermuteVector and Displace", + species.npart(), + Lambda(index_t p) { + const auto current_tag = this_tag(p); + if (current_tag != ParticleTag::alive){ + // dead tags only + if (current_tag == ParticleTag::dead) { + const auto idx_permute_vec = Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + } + // tag = 1->N (excluding dead and alive) + else{ + const auto idx_permute_vec = this_tag_offset(current_tag) - + n_alive + + Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + permute_vector(idx_permute_vec) = p; + this_i1(p) += shifts_in_x1(current_tag); + this_i1_prev(p) += shifts_in_x1(current_tag); + this_i2(p) += shifts_in_x2(current_tag); + this_i2_prev(p) += shifts_in_x2(current_tag); + this_i3(p) += shifts_in_x3(current_tag); + this_i3_prev(p) += shifts_in_x3(current_tag); + } + } + }); + } + + // Sanity check: npart_per_tag must be equal to the current offset except tag=alive + auto current_offset_h = Kokkos::create_mirror_view(current_offset); + Kokkos::deep_copy(current_offset_h, current_offset); + for (std::size_t i { 0 }; i < species.ntags(); ++i) { + if (i != ParticleTag::alive){ + raise::FatalIf(current_offset_h(i) != npart_per_tag_arr[i], + "Error in permute vector construction", + HERE); + } + else{ + raise::FatalIf(current_offset_h(i) != 0, + "Error in permute vector construction", + HERE); } } - // Check if the particle tags are only dead or alive - species.set_npart(npart + std::max(total_send, total_recv) - total_send); - npart_per_tag_arr = species.npart_per_tag(); - // if (mpi_rank == 0) - //{ - // std::cout << "After COMM: " << std::endl; - // std::cout << "Tag counts: "; - // for (std::size_t i = 0; i < tag_count; i++){ - // std::cout << "[" << npart_per_tag_arr[i] << "] "; - // } - // std::cout << std::endl; - // std::cout << "Holes filled: " << total_holes << " Total recv: " << total_recv << - // "Total send: " << total_send << std::endl; - // std::cout << std::endl << "*************"<< std::endl; - // } + /* + Brief on allocation vector: It contains the indices of holes that are filled + by the particles received from other domains + case 1: total_recv > nholes + allocation_vector = | i1 | i2 | i3 | .... | npart | npart + 1 | ... + <-------total_holes------> <---total_recv - nholes--> + (same as permuute vector) (extra particles appended at end) + case 2: total_recv <= nholes + allocation_vector = | i1 | i2 | i3 | .... + <----total_recv-----> + (same as permuute vector) + */ + Kokkos::View allocation_vector("allocation_vector", total_recv); + if (total_recv > total_holes) + { + // Fill the first bit with the permute vector; these are the holes to be filled + Kokkos::parallel_for( + "AllocationVector", + total_holes, + Lambda(index_t p) { + allocation_vector(p) = permute_vector(p); + }); + + // Now allocate the rest to the end of the array + Kokkos::parallel_for( + "AllocationVector", + total_recv - total_holes, + Lambda(index_t p) { + allocation_vector(total_holes + p) = static_cast(npart + p); + }); + } + else + { Kokkos::parallel_for( + "AllocationVector", + total_recv, + Lambda(index_t p) { + allocation_vector(p) = permute_vector(p); + }); + } + // Communicate the arrays + comm::CommunicateParticlesBuffer(species, permute_vector, allocation_vector, + this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, + send_ranks, recv_ranks); #endif } } From 89a109dcfe61fcb30c478857d40a55791dee6476 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Sun, 29 Dec 2024 11:24:23 -0500 Subject: [PATCH 062/124] test for cuda mpi --- benchmark/benchmark.cpp | 64 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 593b7f190..54fc17cf9 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -13,6 +13,8 @@ #include #include "framework/domain/communications.cpp" +#include "mpi.h" +#include "mpi-ext.h" #define TIMER_START(label) \ Kokkos::fence(); \ @@ -97,6 +99,68 @@ void PushParticles(Domain& domain, auto main(int argc, char* argv[]) -> int { GlobalInitialize(argc, argv); { + /* + MPI checks + */ + printf("Compile time check:\n"); +#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT + printf("This MPI library has CUDA-aware support.\n", MPIX_CUDA_AWARE_SUPPORT); +#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT + printf("This MPI library does not have CUDA-aware support.\n"); +#else + printf("This MPI library cannot determine if there is CUDA-aware support.\n"); +#endif /* MPIX_CUDA_AWARE_SUPPORT */ +printf("Run time check:\n"); +#if defined(MPIX_CUDA_AWARE_SUPPORT) + if (1 == MPIX_Query_cuda_support()) { + printf("This MPI library has CUDA-aware support.\n"); + } else { + printf("This MPI library does not have CUDA-aware support.\n"); + } +#else /* !defined(MPIX_CUDA_AWARE_SUPPORT) */ + printf("This MPI library cannot determine if there is CUDA-aware support.\n"); +#endif /* MPIX_CUDA_AWARE_SUPPORT */ + + /* + Test to send and receive Kokkos arrays + */ + int sender_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &sender_rank); + + int neighbor_rank = 0; + if (sender_rank == 0) { + neighbor_rank = 1; + } + else if (sender_rank == 1) { + neighbor_rank = 0; + } + else { + raise::Error("This test is only for 2 ranks", HERE); + } + Kokkos::View send_array("send_array", 10); + Kokkos::View recv_array("recv_array", 10); + if (sender_rank == 0) { + Kokkos::deep_copy(send_array, 10); + } + else { + Kokkos::deep_copy(send_array, 20); + } + + auto send_array_host = Kokkos::create_mirror_view(send_array); + Kokkos::deep_copy(send_array_host, send_array); + auto host_recv_array = Kokkos::create_mirror_view(recv_array); + + MPI_Sendrecv(send_array.data(), send_array.extent(0), MPI_INT, neighbor_rank, 0, + recv_array.data(), recv_array.extent(0), MPI_INT, neighbor_rank, 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + // Print the received array + Kokkos::deep_copy(host_recv_array, recv_array); + for (int i = 0; i < 10; ++i) { + printf("Rank %d: Received %d\n", sender_rank, host_recv_array(i)); + } + + std::cout << "Constructing the domain" << std::endl; // Create a Metadomain object const unsigned int ndomains = 2; From 43924f5fbbcbc14092e7966ff0c4245ba04e71aa Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Mon, 30 Dec 2024 22:33:47 -0500 Subject: [PATCH 063/124] fixed displacements --- src/engines/srpic.hpp | 1 + src/framework/domain/comm_mpi.hpp | 67 +++++++++++++++++-------- src/framework/domain/communications.cpp | 36 +++++-------- 3 files changed, 60 insertions(+), 44 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 78c8f371e..4772b975a 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -1008,6 +1008,7 @@ namespace ntt { Kokkos::Experimental::contribute(domain.fields.bckp, scatter_bckp); m_metadomain.SynchronizeFields(domain, Comm::Bckp, { 0, 1 }); } + logger::Checkpoint("Atmosphere particles injected\n", HERE); if (dim == in::x1) { if (sign > 0) { diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 66ea17d23..0e4817571 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -297,43 +297,70 @@ namespace comm { int recv_rank, const range_tuple_t& send_slice, const range_tuple_t& recv_slice) { - //auto arr_h = Kokkos::create_mirror_view(arr); - //Kokkos::deep_copy(arr_h, arr); const std::size_t send_count = send_slice.second - send_slice.first; const std::size_t recv_count = recv_slice.second - recv_slice.first; + // Make arrays on the host + auto send_arr_h = Kokkos::create_mirror_view(Kokkos::subview(arr, send_slice)); + Kokkos::deep_copy(send_arr_h, Kokkos::subview(arr, send_slice)); + auto recv_arr_h = Kokkos::create_mirror_view(Kokkos::subview(arr, recv_slice)); if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { - MPI_Sendrecv(arr.data() + send_slice.first, + MPI_Sendrecv(send_arr_h.data(), send_count, mpi::get_type(), send_rank, 0, - arr.data() + recv_slice.first, + recv_arr_h.data(), recv_count, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + //MPI_Sendrecv(arr.data() + send_slice.first, + // send_count, + // mpi::get_type(), + // send_rank, + // 0, + // arr.data() + recv_slice.first, + // recv_count, + // mpi::get_type(), + // recv_rank, + // 0, + // MPI_COMM_WORLD, + // MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { - MPI_Send(arr.data() + send_slice.first, - send_count, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); + MPI_Send( send_arr_h.data(), + send_count, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + //MPI_Send(arr.data() + send_slice.first, + // send_count, + // mpi::get_type(), + // send_rank, + // 0, + // MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { - MPI_Recv(arr.data() + recv_slice.first, - recv_count, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); + MPI_Recv( recv_arr_h.data(), + recv_count, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + //MPI_Recv(arr.data() + recv_slice.first, + // recv_count, + // mpi::get_type(), + // recv_rank, + // 0, + // MPI_COMM_WORLD, + // MPI_STATUS_IGNORE); + } + if ((recv_rank >= 0) and (recv_count > 0)) { + Kokkos::deep_copy(Kokkos::subview(arr, recv_slice), recv_arr_h); } - // if ((recv_rank >= 0) and (recv_count > 0)) { - // Kokkos::deep_copy(arr, arr_h); - // } } void ParticleSendRecvCount(int send_rank, diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index cdc9e5b5a..428db1ea6 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -712,38 +712,26 @@ namespace ntt { npart_per_tag_arr_recv[tag_recv] = nrecv; // @CRITICAL: Ask Hayk if the displacements are correctly set before sending - if constexpr (D == Dim::_1D) { + // direction must be defined + if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { if ((-direction)[0] == -1) { - shifts_in_x1_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = subdomain(recv_ind).mesh.n_active(in::x1); } else if ((-direction)[0] == 1) { - shifts_in_x1_h(tag_send) = domain.mesh.n_active(in::x1); - } - } else if constexpr (D == Dim::_2D) { - if ((-direction)[0] == -1) { - shifts_in_x1_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shifts_in_x1_h(tag_send) = domain.mesh.n_active()[0]; - } - if ((-direction)[1] == -1) { - shifts_in_x2_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x2); - } else if ((-direction)[1] == 1) { - shifts_in_x2_h(tag_send) = domain.mesh.n_active(in::x2); - } - } else if constexpr (D == Dim::_3D) { - if ((-direction)[0] == -1) { - shifts_in_x1_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shifts_in_x1_h(tag_send) = domain.mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = -domain.mesh.n_active(in::x1); } + } + if constexpr (D == Dim::_2D || D == Dim::_3D) { if ((-direction)[1] == -1) { - shifts_in_x2_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x2); + shifts_in_x2_h(tag_send) = subdomain(recv_ind).mesh.n_active(in::x2); } else if ((-direction)[1] == 1) { - shifts_in_x2_h(tag_send) = domain.mesh.n_active(in::x2); + shifts_in_x2_h(tag_send) = -domain.mesh.n_active(in::x2); } + } + if constexpr (D == Dim::_3D) { if ((-direction)[2] == -1) { - shifts_in_x3_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x3); + shifts_in_x3_h(tag_send) = subdomain(recv_ind).mesh.n_active(in::x3); } else if ((-direction)[2] == 1) { - shifts_in_x3_h(tag_send) = domain.mesh.n_active(in::x3); + shifts_in_x3_h(tag_send) = -domain.mesh.n_active(in::x3); } } } // end directions loop From 8571797268f689e572ea290df480fe6c49a73707 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Mon, 30 Dec 2024 22:34:09 -0500 Subject: [PATCH 064/124] changed mpi init call --- src/global/global.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index ec22fd2f3..434740446 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -9,7 +9,17 @@ void ntt::GlobalInitialize(int argc, char* argv[]) { Kokkos::initialize(argc, argv); #if defined(MPI_ENABLED) - MPI_Init(&argc, &argv); + int required = MPI_THREAD_MULTIPLE; + int provided; + MPI_Init_thread(&argc, + &argv, + required, + &provided); + if (provided != required) { + std::cerr << "MPI_Init_thread() did not provide the requested threading support." << std::endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + //MPI_Init(&argc, &argv); #endif // MPI_ENABLED } From ea99f3bc86cdfeebc871f14eab00d30dca4fa096 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Wed, 1 Jan 2025 13:54:18 -0500 Subject: [PATCH 065/124] fixed displacements --- src/engines/srpic.hpp | 4 ++-- src/entity.cpp | 2 +- src/framework/domain/comm_mpi.hpp | 7 ++++++- src/framework/domain/communications.cpp | 26 +++++++++++++------------ src/framework/domain/output.cpp | 2 +- src/global/utils/progressbar.cpp | 4 ++-- src/kernels/particle_moments.hpp | 1 - 7 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 4772b975a..6e0d9634e 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -102,6 +102,7 @@ namespace ntt { timers.start("FieldBoundaries"); FieldBoundaries(dom, BC::B); timers.stop("FieldBoundaries"); + Kokkos::fence(); } { @@ -127,7 +128,7 @@ namespace ntt { timers.start("Communications"); if ((sort_interval > 0) and (step % sort_interval == 0)) { - m_metadomain.CommunicateParticles(dom, &timers); + m_metadomain.CommunicateParticlesBuffer(dom, &timers); } timers.stop("Communications"); } @@ -1008,7 +1009,6 @@ namespace ntt { Kokkos::Experimental::contribute(domain.fields.bckp, scatter_bckp); m_metadomain.SynchronizeFields(domain, Comm::Bckp, { 0, 1 }); } - logger::Checkpoint("Atmosphere particles injected\n", HERE); if (dim == in::x1) { if (sign > 0) { diff --git a/src/entity.cpp b/src/entity.cpp index 272635d68..79b2f1335 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -114,4 +114,4 @@ auto main(int argc, char* argv[]) -> int { } return 0; -} +} \ No newline at end of file diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 0e4817571..c8e7de3a7 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -607,6 +607,7 @@ namespace comm { send_buffer_real(NREALS * p + 3) = this_weight(idx); send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); + this_tag(idx) = ParticleTag::dead; }); } if constexpr(D == Dim::_2D && C == Coord::Cart) { @@ -627,6 +628,7 @@ namespace comm { send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + this_tag(idx) = ParticleTag::dead; }); } if constexpr(D == Dim::_2D && C != Coord::Cart) { @@ -648,6 +650,7 @@ namespace comm { send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + this_tag(idx) = ParticleTag::dead; }); } if constexpr(D == Dim::_3D) { @@ -672,6 +675,7 @@ namespace comm { send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); send_buffer_prtldx(NFLOATS * p + 4) = this_dx3(idx); send_buffer_prtldx(NFLOATS * p + 5) = this_dx3_prev(idx); + this_tag(idx) = ParticleTag::dead; }); } @@ -825,7 +829,7 @@ namespace comm { }); } - if constexpr (D == Dim::_2D && C == Coord::Cart) + if constexpr (D == Dim::_2D && C != Coord::Cart) { Kokkos::parallel_for( "PopulateFromRecvBuffer", @@ -875,6 +879,7 @@ namespace comm { this_dx3_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 5); }); } + species.set_npart(species.npart() + std::max(total_send, total_recv) - total_send); return; } diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 428db1ea6..80414bd93 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -787,8 +787,8 @@ namespace ntt { ¤t_offset(current_tag), 1); permute_vector(idx_permute_vec) = p; - this_i1(p) += shifts_in_x1(current_tag); - this_i1_prev(p) += shifts_in_x1(current_tag); + this_i1(p) -= shifts_in_x1(current_tag); + this_i1_prev(p) -= shifts_in_x1(current_tag); } } }); @@ -815,10 +815,10 @@ namespace ntt { ¤t_offset(current_tag), 1); permute_vector(idx_permute_vec) = p; - this_i1(p) += shifts_in_x1(current_tag); - this_i1_prev(p) += shifts_in_x1(current_tag); - this_i2(p) += shifts_in_x2(current_tag); - this_i2_prev(p) += shifts_in_x2(current_tag); + this_i1(p) -= shifts_in_x1(current_tag); + this_i1_prev(p) -= shifts_in_x1(current_tag); + this_i2(p) -= shifts_in_x2(current_tag); + this_i2_prev(p) -= shifts_in_x2(current_tag); } } }); @@ -845,17 +845,19 @@ namespace ntt { ¤t_offset(current_tag), 1); permute_vector(idx_permute_vec) = p; - this_i1(p) += shifts_in_x1(current_tag); - this_i1_prev(p) += shifts_in_x1(current_tag); - this_i2(p) += shifts_in_x2(current_tag); - this_i2_prev(p) += shifts_in_x2(current_tag); - this_i3(p) += shifts_in_x3(current_tag); - this_i3_prev(p) += shifts_in_x3(current_tag); + this_i1(p) -= shifts_in_x1(current_tag); + this_i1_prev(p) -= shifts_in_x1(current_tag); + this_i2(p) -= shifts_in_x2(current_tag); + this_i2_prev(p) -= shifts_in_x2(current_tag); + this_i3(p) -= shifts_in_x3(current_tag); + this_i3_prev(p) -= shifts_in_x3(current_tag); } } }); } + + // Sanity check: npart_per_tag must be equal to the current offset except tag=alive auto current_offset_h = Kokkos::create_mirror_view(current_offset); Kokkos::deep_copy(current_offset_h, current_offset); diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index c7cb6bb65..4a6b2c908 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -107,6 +107,7 @@ namespace ntt { } } + template void ComputeMoments(const SimulationParams& params, const Mesh& mesh, @@ -271,7 +272,6 @@ namespace ntt { }); g_writer.writeMesh(dim, xc, xe); } - const auto output_asis = params.template get("output.debug.as_is"); // !TODO: this can probably be optimized to dump things at once for (auto& fld : g_writer.fieldWriters()) { diff --git a/src/global/utils/progressbar.cpp b/src/global/utils/progressbar.cpp index 74f952382..38f65a790 100644 --- a/src/global/utils/progressbar.cpp +++ b/src/global/utils/progressbar.cpp @@ -52,10 +52,10 @@ namespace pbar { } auto to_human_readable(long double t, const std::string& u) -> std::string { - const auto [tt, tu] = normalize_duration_fmt(t, u); + const auto [tt, tu] = std::pair{t, u};//normalize_duration_fmt(t, u); const auto t1 = static_cast(tt); const auto t2 = tt - static_cast(t1); - const auto [tt2, tu2] = normalize_duration_fmt(t2, tu); + const auto [tt2, tu2] = std::pair{t2, tu};//normalize_duration_fmt(t2, tu); return fmt::format("%d%s %d%s", t1, tu.c_str(), static_cast(tt2), tu2.c_str()); } diff --git a/src/kernels/particle_moments.hpp b/src/kernels/particle_moments.hpp index 8b668a036..0621646ad 100644 --- a/src/kernels/particle_moments.hpp +++ b/src/kernels/particle_moments.hpp @@ -223,7 +223,6 @@ namespace kernel { } coeff *= weight(p) * smooth; } - auto buff_access = Buff.access(); if constexpr (D == Dim::_1D) { for (auto di1 { -window }; di1 <= window; ++di1) { From 73377c2cba083a25d3e449b7632402d24d2b8a32 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Sat, 4 Jan 2025 10:33:22 -0500 Subject: [PATCH 066/124] fixed permute vector construction --- src/engines/srpic.hpp | 5 + src/framework/containers/particles.cpp | 3 + src/framework/containers/particles.h | 4 + src/framework/domain/comm_mpi.hpp | 178 +++++++++++++++++++++--- src/framework/domain/communications.cpp | 84 ++++++++--- src/framework/domain/metadomain.cpp | 27 ++++ src/framework/domain/metadomain.h | 1 + 7 files changed, 263 insertions(+), 39 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 6e0d9634e..d751c712a 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -126,6 +126,11 @@ namespace ntt { timers.stop("CurrentFiltering"); } + // Tags are assigned by now + if (step == 0){ + m_metadomain.SetParticleIDs(dom); + } + timers.start("Communications"); if ((sort_interval > 0) and (step % sort_interval == 0)) { m_metadomain.CommunicateParticlesBuffer(dom, &timers); diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index fe2346132..1cb63bf43 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -47,6 +47,9 @@ namespace ntt { tag = array_t { label + "_tag", maxnpart }; tag_h = Kokkos::create_mirror_view(tag); + particleID = array_t {label + "_particleID", maxnpart}; + particleID_h = Kokkos::create_mirror_view(particleID); + for (unsigned short n { 0 }; n < npld; ++n) { pld.push_back(array_t("pld", maxnpart)); pld_h.push_back(Kokkos::create_mirror_view(pld[n])); diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index ea692bdd9..131ff45c0 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -64,6 +64,8 @@ namespace ntt { std::vector> pld; // phi coordinate (for axisymmetry) array_t phi; + // Array to store the particle ids + array_t particleID; // host mirrors array_mirror_t i1_h, i2_h, i3_h; @@ -73,6 +75,7 @@ namespace ntt { array_mirror_t phi_h; array_mirror_t tag_h; std::vector> pld_h; + array_mirror_t particleID_h; // for empty allocation Particles() {} @@ -178,6 +181,7 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); + footprint += sizeof(long) * particleID.extent(0); for (auto& p : pld) { footprint += sizeof(real_t) * p.extent(0); } diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index c8e7de3a7..82308e107 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -186,8 +186,6 @@ namespace comm { auto recv_fld_h = Kokkos::create_mirror_view(recv_fld); Kokkos::deep_copy(send_fld_h, send_fld); if (send_rank >= 0 && recv_rank >= 0) { - // Segfault here: print mpi params - // Create host views MPI_Sendrecv(send_fld_h.data(), nsend, mpi::get_type(), @@ -515,16 +513,14 @@ namespace comm { auto &this_dx2_prev = species.dx2_prev; auto &this_dx3_prev = species.dx3_prev; auto &this_tag = species.tag; + auto &this_particleID = species.particleID; // Number of arrays of each type to send/recv auto NREALS = 4; auto NINTS = 2; auto NFLOATS = 2; + auto NLONGS = 2; if constexpr (D == Dim::_2D) { - this_i2 = species.i2; - this_i2_prev = species.i2_prev; - this_dx2 = species.dx2; - this_dx2_prev = species.dx2_prev; if (C != Coord::Cart) { NREALS = 5; NINTS = 4; @@ -537,14 +533,6 @@ namespace comm { } } if constexpr (D == Dim::_3D) { - this_i2 = species.i2; - this_i2_prev = species.i2_prev; - this_dx2 = species.dx2; - this_dx2_prev = species.dx2_prev; - this_i3 = species.i3; - this_i3_prev = species.i3_prev; - this_dx3 = species.dx3; - this_dx3_prev = species.dx3_prev; NREALS = 4; NINTS = 6; NFLOATS = 6; @@ -556,6 +544,12 @@ namespace comm { const auto n_alive = npart_per_tag_arr[ParticleTag::alive]; const auto n_dead = npart_per_tag_arr[ParticleTag::dead]; + // Debug test: print send and recv count + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Total send: %d, Total recv: %d \n", rank, total_send, total_recv); + } /* Brief on recv buffers: Each recv buffer contains all the received arrays of a given type. The different physical quantities are stored next to each other @@ -568,10 +562,11 @@ namespace comm { Kokkos::View recv_buffer_int("recv_buffer_int", total_recv * NINTS); Kokkos::View recv_buffer_real("recv_buffer_real", total_recv * NREALS); Kokkos::View recv_buffer_prtldx("recv_buffer_prtldx",total_recv * NFLOATS); + Kokkos::View recv_buffer_long("recv_buffer_long", total_recv * NLONGS); auto recv_buffer_int_h = Kokkos::create_mirror_view(recv_buffer_int); auto recv_buffer_real_h = Kokkos::create_mirror_view(recv_buffer_real); auto recv_buffer_prtldx_h = Kokkos::create_mirror_view(recv_buffer_prtldx); - + auto recv_buffer_long_h = Kokkos::create_mirror_view(recv_buffer_long); auto iteration = 0; auto current_received = 0; @@ -588,9 +583,11 @@ namespace comm { Kokkos::View send_buffer_int("send_buffer_int", send_count * NINTS); Kokkos::View send_buffer_real("send_buffer_real", send_count * NREALS); Kokkos::View send_buffer_prtldx("send_buffer_prtldx",send_count * NFLOATS); + Kokkos::View send_buffer_long("send_buffer_long", send_count * NLONGS); auto send_buffer_int_h = Kokkos::create_mirror_view(send_buffer_int); auto send_buffer_real_h = Kokkos::create_mirror_view(send_buffer_real); auto send_buffer_prtldx_h = Kokkos::create_mirror_view(send_buffer_prtldx); + auto send_buffer_long_h = Kokkos::create_mirror_view(send_buffer_long); // Need different constexpr parallel fors for different dims if constexpr(D == Dim::_1D) { @@ -607,6 +604,8 @@ namespace comm { send_buffer_real(NREALS * p + 3) = this_weight(idx); send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); + send_buffer_long(NLONGS * p + 0) = this_particleID(idx); + send_buffer_long(NLONGS * p + 1) = this_tag(idx); this_tag(idx) = ParticleTag::dead; }); } @@ -628,6 +627,8 @@ namespace comm { send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + send_buffer_long(NLONGS * p + 0) = this_particleID(idx); + send_buffer_long(NLONGS * p + 1) = this_tag(idx); this_tag(idx) = ParticleTag::dead; }); } @@ -650,6 +651,8 @@ namespace comm { send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + send_buffer_long(NLONGS * p + 0) = this_particleID(idx); + send_buffer_long(NLONGS * p + 1) = this_tag(idx); this_tag(idx) = ParticleTag::dead; }); } @@ -675,6 +678,8 @@ namespace comm { send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); send_buffer_prtldx(NFLOATS * p + 4) = this_dx3(idx); send_buffer_prtldx(NFLOATS * p + 5) = this_dx3_prev(idx); + send_buffer_long(NLONGS * p + 0) = this_particleID(idx); + send_buffer_long(NLONGS * p + 1) = this_tag(idx); this_tag(idx) = ParticleTag::dead; }); } @@ -695,14 +700,22 @@ namespace comm { const auto receive_offset_int = current_received * NINTS; const auto receive_offset_real = current_received * NREALS; const auto receive_offset_prtldx = current_received * NFLOATS; + const auto receive_offset_long = current_received * NLONGS; // Comms // Make host arrays for send and recv buffers Kokkos::deep_copy(send_buffer_int_h, send_buffer_int); Kokkos::deep_copy(send_buffer_real_h, send_buffer_real); Kokkos::deep_copy(send_buffer_prtldx_h, send_buffer_prtldx); + Kokkos::deep_copy(send_buffer_long_h, send_buffer_long); if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { + // Debug: Print the rank and type of mpi operation performed + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Performing sendrecv operation \n", rank); + } MPI_Sendrecv(send_buffer_int_h.data(), send_count * NINTS, mpi::get_type(), @@ -739,7 +752,25 @@ namespace comm { 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buffer_long_h.data(), + send_count * NLONGS, + mpi::get_type(), + send_rank, + 0, + recv_buffer_long_h.data() + receive_offset_long, + recv_count*NLONGS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { + // Debug: Print the rank and type of mpi operation performed + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Performing send operation \n", rank); + } MPI_Send(send_buffer_int_h.data(), send_count * NINTS, mpi::get_type(), @@ -758,7 +789,19 @@ namespace comm { send_rank, 0, MPI_COMM_WORLD); + MPI_Send(send_buffer_long_h.data(), + send_count * NLONGS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { + // Debug: Print the rank and type of mpi operation performed + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Performing recv operation \n", rank); + } MPI_Recv(recv_buffer_int_h.data() + receive_offset_int, recv_count * NINTS, mpi::get_type(), @@ -780,9 +823,69 @@ namespace comm { 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(recv_buffer_long_h.data() + receive_offset_long, + recv_count * NLONGS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); } current_received += recv_count; iteration++; + + // Debug test: Print recv buffer before and after + /* + { + int total_ranks; + MPI_Comm_size(MPI_COMM_WORLD, &total_ranks); + for (int allranks=0; allranks| |<----->| |<----->| .... - tag=0 ct tag=1 ct tag=2 ct + tag=dead ct tag=2 ct tag=3 ct */ Kokkos::View permute_vector("permute_vector", total_holes); Kokkos::View current_offset("current_offset", species.ntags()); @@ -778,6 +776,7 @@ namespace ntt { const auto idx_permute_vec = Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); + permute_vector(idx_permute_vec) = p; } // tag = 1->N (excluding dead and alive) else{ @@ -787,8 +786,8 @@ namespace ntt { ¤t_offset(current_tag), 1); permute_vector(idx_permute_vec) = p; - this_i1(p) -= shifts_in_x1(current_tag); - this_i1_prev(p) -= shifts_in_x1(current_tag); + this_i1(p) += shifts_in_x1(current_tag); + this_i1_prev(p) += shifts_in_x1(current_tag); } } }); @@ -806,6 +805,7 @@ namespace ntt { const auto idx_permute_vec = Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); + permute_vector(idx_permute_vec) = p; } // tag = 1->N (excluding dead and alive) else{ @@ -815,10 +815,10 @@ namespace ntt { ¤t_offset(current_tag), 1); permute_vector(idx_permute_vec) = p; - this_i1(p) -= shifts_in_x1(current_tag); - this_i1_prev(p) -= shifts_in_x1(current_tag); - this_i2(p) -= shifts_in_x2(current_tag); - this_i2_prev(p) -= shifts_in_x2(current_tag); + this_i1(p) += shifts_in_x1(current_tag); + this_i1_prev(p) += shifts_in_x1(current_tag); + this_i2(p) += shifts_in_x2(current_tag); + this_i2_prev(p) += shifts_in_x2(current_tag); } } }); @@ -836,6 +836,7 @@ namespace ntt { const auto idx_permute_vec = Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); + permute_vector(idx_permute_vec) = p; } // tag = 1->N (excluding dead and alive) else{ @@ -845,12 +846,12 @@ namespace ntt { ¤t_offset(current_tag), 1); permute_vector(idx_permute_vec) = p; - this_i1(p) -= shifts_in_x1(current_tag); - this_i1_prev(p) -= shifts_in_x1(current_tag); - this_i2(p) -= shifts_in_x2(current_tag); - this_i2_prev(p) -= shifts_in_x2(current_tag); - this_i3(p) -= shifts_in_x3(current_tag); - this_i3_prev(p) -= shifts_in_x3(current_tag); + this_i1(p) += shifts_in_x1(current_tag); + this_i1_prev(p) += shifts_in_x1(current_tag); + this_i2(p) += shifts_in_x2(current_tag); + this_i2_prev(p) += shifts_in_x2(current_tag); + this_i3(p) += shifts_in_x3(current_tag); + this_i3_prev(p) += shifts_in_x3(current_tag); } } }); @@ -913,6 +914,43 @@ namespace ntt { allocation_vector(p) = permute_vector(p); }); } + + /* + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 1 && species.label() == "e+_b") + { + // Copy the tag array to host + auto tag_h = Kokkos::create_mirror_view(species.tag); + Kokkos::deep_copy(tag_h, species.tag); + std::cout << "Tag locs before send" << std::endl; + for (std::size_t i { 0 }; i < species.npart(); i++) { + if (tag_h(i) != ParticleTag::alive) + std::cout <<" Tag: " << tag_h(i) << " loc: "<< i << std::endl; + } + + // Print allocation vector after copying to host + auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); + std::cout << "Total holes: " << total_holes << " Total recv: " << total_recv << std::endl; + Kokkos::deep_copy(allocation_vector_h, allocation_vector); + for (std::size_t i { 0 }; i < total_recv; ++i) { + std::cout << "Rank: " << rank << " Allocation vector: " << allocation_vector_h(i) << std::endl; + } + // Print the permute vector as well + auto permute_vector_h = Kokkos::create_mirror_view(permute_vector); + Kokkos::deep_copy(permute_vector_h, permute_vector); + for (std::size_t i { 0 }; i < total_holes; ++i) { + std::cout << "Rank: " << rank << " Permuted vector: " << permute_vector_h(i) << + " tag: " << tag_h(permute_vector_h(i)) << std::endl; + } + } + */ + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + std::cout << "Rank: " << rank << " Total sent: " << total_holes - total_dead << " Total recv: " << total_recv << std::endl; + } + // Communicate the arrays comm::CommunicateParticlesBuffer(species, permute_vector, allocation_vector, this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index ec8561a9a..a01296823 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -399,6 +399,33 @@ namespace ntt { #endif } + // Function to assign a unique ID to each particle + template + void Metadomain::SetParticleIDs(Domain& domain){ + for (auto& species : domain.species) { + auto &this_particleID = species.particleID; + auto &this_tag = species.tag; + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const auto offset_per_rank = static_cast(1e9 * rank); + std::size_t current_particleID = 0; + Kokkos::View counter_view("current_particleID", 1); + Kokkos::deep_copy(counter_view, current_particleID); + + Kokkos::parallel_for( + "Set Particle IDs", + species.npart(), + Lambda(const std::size_t p){ + if (this_tag(p) == ParticleTag::alive) + { + Kokkos::atomic_increment(&counter_view(0)); + this_particleID(p) = offset_per_rank + static_cast(counter_view(0)); + } + }); + } + return; + } + template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index e30bc8e97..9e94bf89f 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -90,6 +90,7 @@ namespace ntt { void SynchronizeFields(Domain&, CommTags, const range_tuple_t& = { 0, 0 }); void CommunicateParticles(Domain&, timer::Timers*); void CommunicateParticlesBuffer(Domain&, timer::Timers*); + void SetParticleIDs(Domain&); /** * @param global_ndomains total number of domains From 0d18e5c07cdd6b9cb336b9ec976d032003eae687 Mon Sep 17 00:00:00 2001 From: Sasha Chernoglazov Date: Sat, 4 Jan 2025 18:18:47 -0500 Subject: [PATCH 067/124] test of the number of particles --- extern/adios2 | 2 +- setups/srpic/blob/blob.py | 62 +++++++++++++++++++++ setups/srpic/blob/blob.toml | 66 +++++++++++++++++++++++ setups/srpic/blob/nparts.py | 38 +++++++++++++ setups/srpic/blob/pgen.hpp | 104 ++++++++++++++++++++++++++++++++++++ 5 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 setups/srpic/blob/blob.py create mode 100644 setups/srpic/blob/blob.toml create mode 100644 setups/srpic/blob/nparts.py create mode 100644 setups/srpic/blob/pgen.hpp diff --git a/extern/adios2 b/extern/adios2 index b8761e2af..f80ad829d 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit b8761e2afab2cd05b89d09b2ee4da1cd7a834225 +Subproject commit f80ad829d751241140c40923503e1888e27e22e1 diff --git a/setups/srpic/blob/blob.py b/setups/srpic/blob/blob.py new file mode 100644 index 000000000..77337d3b2 --- /dev/null +++ b/setups/srpic/blob/blob.py @@ -0,0 +1,62 @@ +import h5py +import numpy as np +import matplotlib.pyplot as plt + +f = open("report", "r") +Lines = f.readlines() +f.close() + +em_new = [] +ep_new = [] +time_new = [] +for i in range (len(Lines)): + line = Lines[i] + line = line.strip() + arr = line.split() + + if (len(arr)>0 and arr[0]=='species'): + nparts = arr[2].split("..") + if (nparts[0]=="(e-_p)"): + em_new.append(float(nparts[-1])) + if (nparts[0]=="(e+_p)"): + ep_new.append(float(nparts[-1])) + + if (len(arr)>0 and arr[0]=='Time:'): + time_new.append(float(arr[1])) + +f = h5py.File('blob.h5', 'r') + +Nsteps = len(f.keys()) +print(list(f['Step0'].keys())) + +for i in range (Nsteps): + print (i) + fig = plt.figure(dpi=300, figsize=(8,8), facecolor='white') + + densMax = max(np.max(f['Step'+str(i)]['fN_1']),np.max(f['Step'+str(i)]['fN_2'])) + print(densMax) + ax1 = fig.add_axes([0.05,0.05,0.4,0.4]) + im1=ax1.pcolormesh(f['Step'+str(i)]['X1'],f['Step'+str(i)]['X2'],f['Step'+str(i)]['fN_1'],cmap='turbo',vmin=0,vmax=1.0) + ax1.set_title(r"$N_1$") + ax1.vlines(0,-10.0,10.0,color='white') + + ax1 = fig.add_axes([0.48,0.05,0.4,0.4]) + ax1.pcolormesh(f['Step'+str(i)]['X1'],f['Step'+str(i)]['X2'],f['Step'+str(i)]['fN_2'],cmap='turbo',vmin=0,vmax=1.0) + ax1.set_yticklabels([]) + ax1.set_title(r"$N_2$") + ax1.vlines(0,-10.0,10.0,color='white') + + ax4cb = fig.add_axes([0.89, 0.05, 0.01, 0.4]) + cbar4 = fig.colorbar(im1,cax=ax4cb) + + ax1= fig.add_axes([0.05,0.5,0.83,0.4]) + ax1.plot(time_new,em_new, color='blue', label=r'$e^-$, new') + ax1.plot(time_new,ep_new, color='red', label=r'$e^+$, new') + ax1.legend() + ax1.set_ylim(0,1.8e5) + ax1.set_xlim(0,100) + ax1.vlines(i, 0,1.8e5, color='green',linewidth=0.6) + + + fig.savefig("%05d"%i+".png",dpi=300,bbox_inches='tight') + plt.close() diff --git a/setups/srpic/blob/blob.toml b/setups/srpic/blob/blob.toml new file mode 100644 index 000000000..7c03b1f9e --- /dev/null +++ b/setups/srpic/blob/blob.toml @@ -0,0 +1,66 @@ +[simulation] + name = "blob" + engine = "srpic" + runtime = 100.0 + + [simulation.domain] + decomposition = [2,1,1] + +[grid] + resolution = [1024, 1024] + extent = [[-10.0, 10.0], [-10.0, 10.0]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"]] + +[scales] + larmor0 = 1.0 + skindepth0 = 1.0 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 16.0 + + [[particles.species]] + label = "e-_p" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 + + [[particles.species]] + label = "e+_p" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 + +[setup] + temp_1 = 1e-4 + x1c = -5.0 + x2c = 0.0 + v_max = 50.0 + dr = 1.0 + +[output] + format = "hdf5" + interval_time = 1.0 + + [output.fields] + quantities = ["N_1", "N_2", "B", "E"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + +[diagnostics] + colored_stdout = false diff --git a/setups/srpic/blob/nparts.py b/setups/srpic/blob/nparts.py new file mode 100644 index 000000000..e759422c0 --- /dev/null +++ b/setups/srpic/blob/nparts.py @@ -0,0 +1,38 @@ +import h5py +import numpy as np +import matplotlib.pyplot as plt + +f = open("report", "r") +Lines = f.readlines() +f.close() + +em_new = [] +ep_new = [] +time_new = [] +for i in range (len(Lines)): + line = Lines[i] + line = line.strip() + arr = line.split() + + if (len(arr)>0 and arr[0]=='species'): + nparts = arr[2].split("..") + if (nparts[0]=="(e-_p)"): + em_new.append(float(nparts[-1])) + if (nparts[0]=="(e+_p)"): + ep_new.append(float(nparts[-1])) + + if (len(arr)>0 and arr[0]=='Time:'): + time_new.append(float(arr[1])) + + +fig = plt.figure(dpi=300, figsize=(8,8), facecolor='white') + +ax1= fig.add_axes([0.05,0.5,0.83,0.4]) +ax1.plot(time_new,em_new, color='blue', label=r'$e^-$, new') +ax1.plot(time_new,ep_new, color='red', label=r'$e^+$, new') +ax1.legend() +ax1.set_ylim(0,1.8e5) +ax1.set_xlim(0,100) + +fig.savefig("nparts.png",dpi=300,bbox_inches='tight') +plt.close() diff --git a/setups/srpic/blob/pgen.hpp b/setups/srpic/blob/pgen.hpp new file mode 100644 index 000000000..38b3db1c5 --- /dev/null +++ b/setups/srpic/blob/pgen.hpp @@ -0,0 +1,104 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "arch/traits.h" + +#include "archetypes/energy_dist.h" +#include "archetypes/particle_injector.h" +#include "archetypes/problem_generator.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + +namespace user { + using namespace ntt; + + template + struct CounterstreamEnergyDist : public arch::EnergyDistribution { + CounterstreamEnergyDist(const M& metric, real_t v_max) + : arch::EnergyDistribution { metric } + , v_max { v_max } {} + + Inline void operator()(const coord_t& x_Ph, + vec_t& v, + unsigned short sp) const override { + v[0] = v_max; + } + + private: + const real_t v_max; + }; + + template + struct GaussianDist : public arch::SpatialDistribution { + GaussianDist(const M& metric, real_t x1c, real_t x2c, real_t dr) + : arch::SpatialDistribution { metric } + , x1c { x1c } + , x2c { x2c } + , dr { dr } {} + + // to properly scale the number density, the probability should be normalized to 1 + Inline auto operator()(const coord_t& x_Ph) const -> real_t override { + if (math::abs(x_Ph[0] - x1c) < dr && math::abs(x_Ph[1] - x2c) < dr){ + return 1.0; + }else{ + return 0.0; + } + } + + private: + const real_t x1c, x2c, dr; + }; + + template + struct PGen : public arch::ProblemGenerator { + + // compatibility traits for the problem generator + static constexpr auto engines = traits::compatible_with::value; + static constexpr auto metrics = traits::compatible_with::value; + static constexpr auto dimensions = + traits::compatible_with::value; + + // for easy access to variables in the child class + using arch::ProblemGenerator::D; + using arch::ProblemGenerator::C; + using arch::ProblemGenerator::params; + + const real_t temp_1, x1c, x2c, dr, v_max; + + inline PGen(const SimulationParams& p, const Metadomain& global_domain) + : arch::ProblemGenerator { p } + , temp_1 { p.template get("setup.temp_1") } + , x1c { p.template get("setup.x1c") } + , x2c { p.template get("setup.x2c") } + , v_max { p.template get("setup.v_max") } + , dr { p.template get("setup.dr") } {} + + inline void InitPrtls(Domain& local_domain) { + const auto energy_dist = CounterstreamEnergyDist( + local_domain.mesh.metric, + v_max); + const auto spatial_dist = GaussianDist(local_domain.mesh.metric, + x1c, + x2c, + dr); + const auto injector = + arch::NonUniformInjector( + energy_dist, + spatial_dist, + { 1, 2 }); + + arch::InjectNonUniform>( + params, + local_domain, + injector, + 1.0); + } + }; + +} // namespace user + +#endif From d7f92f0ae4ea7a7c4407625c8822f417eb8810d1 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Sun, 5 Jan 2025 00:37:22 -0500 Subject: [PATCH 068/124] added function to remove dead particles --- src/engines/srpic.hpp | 8 + src/framework/domain/comm_mpi.hpp | 114 +-------- src/framework/domain/communications.cpp | 306 ++++++++++++++++++++---- src/framework/domain/metadomain.h | 1 + 4 files changed, 275 insertions(+), 154 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index d751c712a..686ed0c35 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -175,6 +175,14 @@ namespace ntt { ParticleInjector(dom); timers.stop("Injector"); } + + if (step % 10 == 0 && step > 0){ + + timers.start("RemoveDead"); + m_metadomain.RemoveDeadParticles(dom, &timers); + timers.stop("RemoveDead"); + } + } /* algorithm substeps --------------------------------------------------- */ diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 82308e107..7b9a22eee 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -544,12 +544,6 @@ namespace comm { const auto n_alive = npart_per_tag_arr[ParticleTag::alive]; const auto n_dead = npart_per_tag_arr[ParticleTag::dead]; - // Debug test: print send and recv count - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Total send: %d, Total recv: %d \n", rank, total_send, total_recv); - } /* Brief on recv buffers: Each recv buffer contains all the received arrays of a given type. The different physical quantities are stored next to each other @@ -710,12 +704,6 @@ namespace comm { if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { - // Debug: Print the rank and type of mpi operation performed - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing sendrecv operation \n", rank); - } MPI_Sendrecv(send_buffer_int_h.data(), send_count * NINTS, mpi::get_type(), @@ -765,12 +753,6 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { - // Debug: Print the rank and type of mpi operation performed - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing send operation \n", rank); - } MPI_Send(send_buffer_int_h.data(), send_count * NINTS, mpi::get_type(), @@ -796,12 +778,6 @@ namespace comm { 0, MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { - // Debug: Print the rank and type of mpi operation performed - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing recv operation \n", rank); - } MPI_Recv(recv_buffer_int_h.data() + receive_offset_int, recv_count * NINTS, mpi::get_type(), @@ -833,59 +809,8 @@ namespace comm { } current_received += recv_count; iteration++; + - // Debug test: Print recv buffer before and after - /* - { - int total_ranks; - MPI_Comm_size(MPI_COMM_WORLD, &total_ranks); - for (int allranks=0; allranks current_offset("current_offset", species.ntags()); auto &this_tag_offset = tag_offset; - auto n_alive = npart_per_tag_arr[ParticleTag::alive]; - if constexpr (D == Dim::_1D){ Kokkos::parallel_for( "PermuteVector and Displace", @@ -781,7 +779,7 @@ namespace ntt { // tag = 1->N (excluding dead and alive) else{ const auto idx_permute_vec = this_tag_offset(current_tag) - - n_alive + + total_alive + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -810,7 +808,7 @@ namespace ntt { // tag = 1->N (excluding dead and alive) else{ const auto idx_permute_vec = this_tag_offset(current_tag) - - n_alive + + total_alive + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -841,7 +839,7 @@ namespace ntt { // tag = 1->N (excluding dead and alive) else{ const auto idx_permute_vec = this_tag_offset(current_tag) - - n_alive + + total_alive + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -915,42 +913,6 @@ namespace ntt { }); } - /* - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - if (rank == 1 && species.label() == "e+_b") - { - // Copy the tag array to host - auto tag_h = Kokkos::create_mirror_view(species.tag); - Kokkos::deep_copy(tag_h, species.tag); - std::cout << "Tag locs before send" << std::endl; - for (std::size_t i { 0 }; i < species.npart(); i++) { - if (tag_h(i) != ParticleTag::alive) - std::cout <<" Tag: " << tag_h(i) << " loc: "<< i << std::endl; - } - - // Print allocation vector after copying to host - auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); - std::cout << "Total holes: " << total_holes << " Total recv: " << total_recv << std::endl; - Kokkos::deep_copy(allocation_vector_h, allocation_vector); - for (std::size_t i { 0 }; i < total_recv; ++i) { - std::cout << "Rank: " << rank << " Allocation vector: " << allocation_vector_h(i) << std::endl; - } - // Print the permute vector as well - auto permute_vector_h = Kokkos::create_mirror_view(permute_vector); - Kokkos::deep_copy(permute_vector_h, permute_vector); - for (std::size_t i { 0 }; i < total_holes; ++i) { - std::cout << "Rank: " << rank << " Permuted vector: " << permute_vector_h(i) << - " tag: " << tag_h(permute_vector_h(i)) << std::endl; - } - } - */ - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - std::cout << "Rank: " << rank << " Total sent: " << total_holes - total_dead << " Total recv: " << total_recv << std::endl; - } - // Communicate the arrays comm::CommunicateParticlesBuffer(species, permute_vector, allocation_vector, this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, @@ -959,6 +921,268 @@ namespace ntt { } } + /* + Function to remove dead particles from the domain + */ + template + void Metadomain::RemoveDeadParticles(Domain& domain, + timer::Timers* timers){ + MPI_Barrier(MPI_COMM_WORLD); + for (auto& species : domain.species) { + auto [npart_per_tag_arr, + tag_offset] = species.npart_per_tag(); + auto npart = static_cast(species.npart()); + auto total_alive = static_cast( + npart_per_tag_arr[ParticleTag::alive]); + auto total_dead = static_cast( + npart_per_tag_arr[ParticleTag::dead]); + + if (total_dead != 0){ + // Check that only alive and dead particles are present + for (std::size_t i { 0 }; i < species.ntags(); i++) { + if (i != ParticleTag::alive && i != ParticleTag::dead){ + raise::FatalIf(npart_per_tag_arr[i] != 0, + "Particle tags can only be dead or alive at this point", + HERE); + } + } + + // Get the indices of all alive particles + auto &this_ux1 = species.ux1; + auto &this_ux2 = species.ux2; + auto &this_ux3 = species.ux3; + auto &this_weight = species.weight; + auto &this_phi = species.phi; + auto &this_i1 = species.i1; + auto &this_i1_prev = species.i1_prev; + auto &this_i2 = species.i2; + auto &this_i3 = species.i3; + auto &this_i2_prev = species.i2_prev; + auto &this_i3_prev = species.i3_prev; + auto &this_dx1 = species.dx1; + auto &this_dx1_prev = species.dx1_prev; + auto &this_dx2 = species.dx2; + auto &this_dx3 = species.dx3; + auto &this_dx2_prev = species.dx2_prev; + auto &this_dx3_prev = species.dx3_prev; + auto &this_tag = species.tag; + + // Create buffers to store alive particles + Kokkos::View buffer_ctr("buffer_ctr", 1); + Kokkos::View buffer_int("buffer_int", total_alive); + Kokkos::View buffer_real("buffer_real", total_alive); + Kokkos::View buffer_prtldx("buffer_prtldx",total_alive); + + // Simulaneously update i1, u1, dx1 + Kokkos::parallel_for( + "CopyToBuffer i1 u1 dx1", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_int(idx) = this_i1(p); + buffer_real(idx) = this_ux1(p); + buffer_prtldx(idx) = this_dx1(p); + } + }); + + Kokkos::parallel_for( + "i1 u1 dx1 from Buffer", + total_alive, + Lambda(index_t p) { + this_i1(p) = buffer_int(p); + this_ux1(p) = buffer_real(p); + this_dx1(p) = buffer_prtldx(p); + }); + + // Update i1_prev, dx1_prev, u2 + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer i1_prev dx1_prev u2", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_real(idx) = this_ux2(p); + buffer_prtldx(idx) = this_dx1_prev(p); + buffer_int(idx) = this_i1_prev(p); + } + }); + + Kokkos::parallel_for( + "i1_prev u2 dx1_prev from Buffer", + total_alive, + Lambda(index_t p) { + this_i1_prev(p) = buffer_int(p); + this_ux2(p) = buffer_real(p); + this_dx1_prev(p) = buffer_prtldx(p); + }); + + // Update u3 + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer u3", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_real(idx) = this_ux3(p); + } + }); + + Kokkos::parallel_for( + "u3 from Buffer", + total_alive, + Lambda(index_t p) { + this_ux3(p) = buffer_real(p); + }); + + + // Update weight + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer weight", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_real(idx) = this_weight(p); + } + }); + + Kokkos::parallel_for( + "weight from Buffer", + total_alive, + Lambda(index_t p) { + this_weight(p) = buffer_real(p); + }); + + // Update i2, dx2, i2_prev, dx2_prev + if constexpr(D == Dim::_2D || D == Dim::_3D){ + // i2, dx2 + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer i2 dx2", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_int(idx) = this_i2(p); + buffer_prtldx(idx) = this_dx2(p); + } + }); + + Kokkos::parallel_for( + "i2 dx2 from Buffer", + total_alive, + Lambda(index_t p) { + this_i2(p) = buffer_int(p); + this_dx2(p) = buffer_prtldx(p); + }); + + // i2_prev, dx2_prev + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer i2_prev dx2_prev", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_int(idx) = this_i2_prev(p); + buffer_prtldx(idx) = this_dx2_prev(p); + } + }); + + Kokkos::parallel_for( + "i2_prev dx2_prev from Buffer", + total_alive, + Lambda(index_t p) { + this_i2_prev(p) = buffer_int(p); + this_dx2_prev(p) = buffer_prtldx(p); + }); + + } + + // Update i3, dx3, i3_prev, dx3_prev + if constexpr(D == Dim::_3D){ + // i3, dx3 + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer i3 dx3", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_int(idx) = this_i3(p); + buffer_prtldx(idx) = this_dx3(p); + } + }); + + Kokkos::parallel_for( + "i3 dx3 from Buffer", + total_alive, + Lambda(index_t p) { + this_i3(p) = buffer_int(p); + this_dx3(p) = buffer_prtldx(p); + }); + + // i3_prev, dx3_prev + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer i3_prev dx3_prev", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_int(idx) = this_i3_prev(p); + buffer_prtldx(idx) = this_dx3_prev(p); + } + }); + + Kokkos::parallel_for( + "i3_prev dx3_prev from Buffer", + total_alive, + Lambda(index_t p) { + this_i3_prev(p) = buffer_int(p); + this_dx3_prev(p) = buffer_prtldx(p); + }); + } + + // phi + if constexpr(D == Dim::_2D && M::CoordType != Coord::Cart){ + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer phi", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_real(idx) = this_phi(p); + } + }); + + Kokkos::parallel_for( + "phi from Buffer", + total_alive, + Lambda(index_t p) { + this_phi(p) = buffer_real(p); + }); + + } + + // tags + Kokkos::parallel_for( + "Make tags alive", + total_alive, + Lambda(index_t p) { + this_tag(p) = ParticleTag::alive; + }); + species.set_npart(total_alive); + } + } + return; + } + template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 9e94bf89f..6bd3d29d8 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -91,6 +91,7 @@ namespace ntt { void CommunicateParticles(Domain&, timer::Timers*); void CommunicateParticlesBuffer(Domain&, timer::Timers*); void SetParticleIDs(Domain&); + void RemoveDeadParticles(Domain&, timer::Timers* ); /** * @param global_ndomains total number of domains From 39f6c9f64f5dd9f5df34734036277ecf81a57c84 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Sun, 5 Jan 2025 23:44:32 -0500 Subject: [PATCH 069/124] testing: remove dead prtls --- src/engines/srpic.hpp | 2 +- src/framework/domain/comm_mpi.hpp | 27 ++ src/framework/domain/communications.cpp | 348 ++++++++---------------- 3 files changed, 138 insertions(+), 239 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 686ed0c35..b54327076 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -176,7 +176,7 @@ namespace ntt { timers.stop("Injector"); } - if (step % 10 == 0 && step > 0){ + if (step % 100 == 0 && step > 0){ timers.start("RemoveDead"); m_metadomain.RemoveDeadParticles(dom, &timers); diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 7b9a22eee..aa35ce2a6 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -918,6 +918,33 @@ namespace comm { return; } +/* + Function to copy the alive particle data the arrays to a buffer and then back + to the particle arrays +*/ + template + void MoveDeadToEnd(array_t& arr, + Kokkos::View indices_alive) { + auto n_alive = indices_alive.extent(0); + auto buffer = Kokkos::View("buffer", n_alive); + Kokkos::parallel_for( + "PopulateBufferAlive", + n_alive, + Lambda(const std::size_t p) { + buffer(p) = arr(indices_alive(p)); + }); + + Kokkos::parallel_for( + "CopyBufferToArr", + n_alive, + Lambda(const std::size_t p) { + arr(p) = buffer(p); + }); + + return; + } + + } // namespace comm #endif // FRAMEWORK_DOMAIN_COMM_MPI_HPP diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 433a97fb8..ee0d37f10 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -927,7 +927,6 @@ namespace ntt { template void Metadomain::RemoveDeadParticles(Domain& domain, timer::Timers* timers){ - MPI_Barrier(MPI_COMM_WORLD); for (auto& species : domain.species) { auto [npart_per_tag_arr, tag_offset] = species.npart_per_tag(); @@ -936,250 +935,123 @@ namespace ntt { npart_per_tag_arr[ParticleTag::alive]); auto total_dead = static_cast( npart_per_tag_arr[ParticleTag::dead]); - - if (total_dead != 0){ - // Check that only alive and dead particles are present - for (std::size_t i { 0 }; i < species.ntags(); i++) { - if (i != ParticleTag::alive && i != ParticleTag::dead){ - raise::FatalIf(npart_per_tag_arr[i] != 0, - "Particle tags can only be dead or alive at this point", - HERE); - } - } - - // Get the indices of all alive particles - auto &this_ux1 = species.ux1; - auto &this_ux2 = species.ux2; - auto &this_ux3 = species.ux3; - auto &this_weight = species.weight; - auto &this_phi = species.phi; - auto &this_i1 = species.i1; - auto &this_i1_prev = species.i1_prev; - auto &this_i2 = species.i2; - auto &this_i3 = species.i3; - auto &this_i2_prev = species.i2_prev; - auto &this_i3_prev = species.i3_prev; - auto &this_dx1 = species.dx1; - auto &this_dx1_prev = species.dx1_prev; - auto &this_dx2 = species.dx2; - auto &this_dx3 = species.dx3; - auto &this_dx2_prev = species.dx2_prev; - auto &this_dx3_prev = species.dx3_prev; - auto &this_tag = species.tag; - - // Create buffers to store alive particles - Kokkos::View buffer_ctr("buffer_ctr", 1); - Kokkos::View buffer_int("buffer_int", total_alive); - Kokkos::View buffer_real("buffer_real", total_alive); - Kokkos::View buffer_prtldx("buffer_prtldx",total_alive); - - // Simulaneously update i1, u1, dx1 - Kokkos::parallel_for( - "CopyToBuffer i1 u1 dx1", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_int(idx) = this_i1(p); - buffer_real(idx) = this_ux1(p); - buffer_prtldx(idx) = this_dx1(p); - } - }); - - Kokkos::parallel_for( - "i1 u1 dx1 from Buffer", - total_alive, - Lambda(index_t p) { - this_i1(p) = buffer_int(p); - this_ux1(p) = buffer_real(p); - this_dx1(p) = buffer_prtldx(p); - }); - - // Update i1_prev, dx1_prev, u2 - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer i1_prev dx1_prev u2", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_real(idx) = this_ux2(p); - buffer_prtldx(idx) = this_dx1_prev(p); - buffer_int(idx) = this_i1_prev(p); - } - }); - - Kokkos::parallel_for( - "i1_prev u2 dx1_prev from Buffer", - total_alive, - Lambda(index_t p) { - this_i1_prev(p) = buffer_int(p); - this_ux2(p) = buffer_real(p); - this_dx1_prev(p) = buffer_prtldx(p); - }); - - // Update u3 - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer u3", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_real(idx) = this_ux3(p); - } - }); - - Kokkos::parallel_for( - "u3 from Buffer", - total_alive, - Lambda(index_t p) { - this_ux3(p) = buffer_real(p); - }); - - - // Update weight - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer weight", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_real(idx) = this_weight(p); - } - }); - - Kokkos::parallel_for( - "weight from Buffer", - total_alive, - Lambda(index_t p) { - this_weight(p) = buffer_real(p); - }); - - // Update i2, dx2, i2_prev, dx2_prev - if constexpr(D == Dim::_2D || D == Dim::_3D){ - // i2, dx2 - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer i2 dx2", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_int(idx) = this_i2(p); - buffer_prtldx(idx) = this_dx2(p); - } - }); - - Kokkos::parallel_for( - "i2 dx2 from Buffer", - total_alive, - Lambda(index_t p) { - this_i2(p) = buffer_int(p); - this_dx2(p) = buffer_prtldx(p); - }); - - // i2_prev, dx2_prev - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer i2_prev dx2_prev", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_int(idx) = this_i2_prev(p); - buffer_prtldx(idx) = this_dx2_prev(p); - } - }); - - Kokkos::parallel_for( - "i2_prev dx2_prev from Buffer", - total_alive, - Lambda(index_t p) { - this_i2_prev(p) = buffer_int(p); - this_dx2_prev(p) = buffer_prtldx(p); - }); + // Check that only alive and dead particles are present + for (std::size_t i { 0 }; i < species.ntags(); i++) { + if (i != ParticleTag::alive && i != ParticleTag::dead){ + raise::FatalIf(npart_per_tag_arr[i] != 0, + "Particle tags can only be dead or alive at this point", + HERE); } - - // Update i3, dx3, i3_prev, dx3_prev - if constexpr(D == Dim::_3D){ - // i3, dx3 - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer i3 dx3", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_int(idx) = this_i3(p); - buffer_prtldx(idx) = this_dx3(p); - } - }); - - Kokkos::parallel_for( - "i3 dx3 from Buffer", - total_alive, - Lambda(index_t p) { - this_i3(p) = buffer_int(p); - this_dx3(p) = buffer_prtldx(p); - }); - - // i3_prev, dx3_prev - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer i3_prev dx3_prev", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_int(idx) = this_i3_prev(p); - buffer_prtldx(idx) = this_dx3_prev(p); + } + { + int rank, totranks; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &totranks); + for (std::size_t current_rank=0; current_rank indices_alive("indices_alive", total_alive); + Kokkos::View alive_counter("counter_alive", 1); + Kokkos::deep_copy(alive_counter, 0); + Kokkos::parallel_for( + "Indices of Alive Particles", + species.npart(), + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); + indices_alive(idx) = p; } - - // tags - Kokkos::parallel_for( - "Make tags alive", - total_alive, - Lambda(index_t p) { - this_tag(p) = ParticleTag::alive; - }); - species.set_npart(total_alive); + }); + // Sanity check: alive_counter must be equal to total_alive + auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); + Kokkos::deep_copy(alive_counter_h, alive_counter); + raise::FatalIf(alive_counter_h(0) != total_alive, + "Error in finding alive particles", + HERE); + comm::MoveDeadToEnd(species.i1, indices_alive); + comm::MoveDeadToEnd(species.i1_prev, indices_alive); + comm::MoveDeadToEnd(species.dx1_prev, indices_alive); + comm::MoveDeadToEnd(species.ux1, indices_alive); + comm::MoveDeadToEnd(species.ux2, indices_alive); + comm::MoveDeadToEnd(species.ux3, indices_alive); + comm::MoveDeadToEnd(species.weight, indices_alive); + // Update i2, dx2, i2_prev, dx2_prev + if constexpr(D == Dim::_2D || D == Dim::_3D){ + comm::MoveDeadToEnd(species.i2, indices_alive); + comm::MoveDeadToEnd(species.i2_prev, indices_alive); + comm::MoveDeadToEnd(species.dx2, indices_alive); + comm::MoveDeadToEnd(species.dx2_prev, indices_alive); + if constexpr(D == Dim::_2D && M::CoordType != Coord::Cart){ + comm::MoveDeadToEnd(species.phi, indices_alive); + } } + // Update i3, dx3, i3_prev, dx3_prev + if constexpr(D == Dim::_3D){ + comm::MoveDeadToEnd(species.i3, indices_alive); + comm::MoveDeadToEnd(species.i3_prev, indices_alive); + comm::MoveDeadToEnd(species.dx3, indices_alive); + comm::MoveDeadToEnd(species.dx3_prev, indices_alive); + } + // tags + Kokkos::parallel_for( + "Make tags alive", + total_alive, + Lambda(index_t p) { + this_tag(p) = ParticleTag::alive; + }); + species.set_npart(total_alive); + + + int rank, totranks; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &totranks); + for (std::size_t current_rank=0; current_rank Date: Mon, 6 Jan 2025 01:39:18 -0500 Subject: [PATCH 070/124] testing removedeadparticles() --- src/framework/domain/communications.cpp | 79 +++++++++++++++++++++---- 1 file changed, 66 insertions(+), 13 deletions(-) diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index ee0d37f10..6450aa4b7 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -923,6 +923,33 @@ namespace ntt { /* Function to remove dead particles from the domain + + Consider the following particle quantity array + <---xxx---x---xx---xx-----------xx----x--> (qty) + - = alive + x = dead + ntot = nalive + ndead + + (1) Copy all alive particle data to buffer + <---xxx---x---xx---xx-----------xx----x--> (qty) + | + | + v + <--------------------------> buffer + (nalive) + + (2) Copy from buffer to the beginning of the array + overwritting all particles + <--------------------------> buffer + (nalive) + | + | + v + <--------------------------xx----x--> (qty) + ^ + (nalive) + + (3) Set npart to nalive */ template void Metadomain::RemoveDeadParticles(Domain& domain, @@ -930,11 +957,11 @@ namespace ntt { for (auto& species : domain.species) { auto [npart_per_tag_arr, tag_offset] = species.npart_per_tag(); - auto npart = static_cast(species.npart()); - auto total_alive = static_cast( - npart_per_tag_arr[ParticleTag::alive]); - auto total_dead = static_cast( - npart_per_tag_arr[ParticleTag::dead]); + const auto npart = static_cast(species.npart()); + const auto total_alive = static_cast( + npart_per_tag_arr[ParticleTag::alive]); + const auto total_dead = static_cast( + npart_per_tag_arr[ParticleTag::dead]); // Check that only alive and dead particles are present for (std::size_t i { 0 }; i < species.ntags(); i++) { @@ -945,6 +972,14 @@ namespace ntt { } } { + auto [npart_per_tag_arr_, + tag_offset_] = species.npart_per_tag(); + auto npart_ = static_cast(species.npart()); + auto total_alive_ = static_cast( + npart_per_tag_arr_[ParticleTag::alive]); + auto total_dead_ = static_cast( + npart_per_tag_arr_[ParticleTag::dead]); + int rank, totranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &totranks); @@ -952,8 +987,8 @@ namespace ntt { if (rank == current_rank && species.label() == "e-_p"){ std::cout << "Before removing dead particles" << std::endl; std::cout << "Rank: " << rank << std::endl; - std::cout << "Total alive: " << total_alive << std::endl; - std::cout << "Total dead: " << total_dead << std::endl; + std::cout << "Total alive: " << total_alive_ << std::endl; + std::cout << "Total dead: " << total_dead_ << std::endl; std::cout << "Total particles: " << npart << std::endl; for (std::size_t i { 0 }; i < species.ntags(); i++) { std::cout << "Tag: " << i << " count: " << npart_per_tag_arr[i] << std::endl; @@ -1000,6 +1035,7 @@ namespace ntt { raise::FatalIf(alive_counter_h(0) != total_alive, "Error in finding alive particles", HERE); + comm::MoveDeadToEnd(species.i1, indices_alive); comm::MoveDeadToEnd(species.i1_prev, indices_alive); comm::MoveDeadToEnd(species.dx1_prev, indices_alive); @@ -1024,25 +1060,41 @@ namespace ntt { comm::MoveDeadToEnd(species.dx3, indices_alive); comm::MoveDeadToEnd(species.dx3_prev, indices_alive); } - // tags + // tags (set first total_alive to alive and rest to dead) Kokkos::parallel_for( "Make tags alive", total_alive, Lambda(index_t p) { this_tag(p) = ParticleTag::alive; }); + + Kokkos::parallel_for( + "Make tags dead", + total_dead, + Lambda(index_t p) { + this_tag(total_alive + p) = ParticleTag::dead; + }); + species.set_npart(total_alive); - - + + { + auto [npart_per_tag_arr_, + tag_offset_] = species.npart_per_tag(); + auto npart_ = static_cast(species.npart()); + auto total_alive_ = static_cast( + npart_per_tag_arr_[ParticleTag::alive]); + auto total_dead_ = static_cast( + npart_per_tag_arr_[ParticleTag::dead]); + int rank, totranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &totranks); for (std::size_t current_rank=0; current_rank Date: Mon, 6 Jan 2025 23:17:18 -0500 Subject: [PATCH 071/124] fixed dead particle removal bug --- src/engines/srpic.hpp | 5 +- src/framework/domain/comm_mpi.hpp | 27 ------ src/framework/domain/communications.cpp | 112 ++++++++++++++++-------- 3 files changed, 77 insertions(+), 67 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index b54327076..fd1ca226a 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -176,11 +176,12 @@ namespace ntt { timers.stop("Injector"); } - if (step % 100 == 0 && step > 0){ - + if (step % 1 == 0 && step > 0){ + MPI_Barrier(MPI_COMM_WORLD); timers.start("RemoveDead"); m_metadomain.RemoveDeadParticles(dom, &timers); timers.stop("RemoveDead"); + MPI_Barrier(MPI_COMM_WORLD); } } diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index aa35ce2a6..7b9a22eee 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -918,33 +918,6 @@ namespace comm { return; } -/* - Function to copy the alive particle data the arrays to a buffer and then back - to the particle arrays -*/ - template - void MoveDeadToEnd(array_t& arr, - Kokkos::View indices_alive) { - auto n_alive = indices_alive.extent(0); - auto buffer = Kokkos::View("buffer", n_alive); - Kokkos::parallel_for( - "PopulateBufferAlive", - n_alive, - Lambda(const std::size_t p) { - buffer(p) = arr(indices_alive(p)); - }); - - Kokkos::parallel_for( - "CopyBufferToArr", - n_alive, - Lambda(const std::size_t p) { - arr(p) = buffer(p); - }); - - return; - } - - } // namespace comm #endif // FRAMEWORK_DOMAIN_COMM_MPI_HPP diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 6450aa4b7..9f7b088f0 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -921,6 +921,32 @@ namespace ntt { } } + +/* + Function to copy the alive particle data the arrays to a buffer and then back + to the particle arrays +*/ + template + void MoveDeadToEnd(array_t& arr, + Kokkos::View indices_alive) { + auto n_alive = indices_alive.extent(0); + auto buffer = Kokkos::View("buffer", n_alive); + Kokkos::parallel_for( + "PopulateBufferAlive", + n_alive, + Lambda(const std::size_t p) { + buffer(p) = arr(indices_alive(p)); + }); + + Kokkos::parallel_for( + "CopyBufferToArr", + n_alive, + Lambda(const std::size_t p) { + arr(p) = buffer(p); + }); + return; + } + /* Function to remove dead particles from the domain @@ -989,33 +1015,33 @@ namespace ntt { std::cout << "Rank: " << rank << std::endl; std::cout << "Total alive: " << total_alive_ << std::endl; std::cout << "Total dead: " << total_dead_ << std::endl; - std::cout << "Total particles: " << npart << std::endl; + std::cout << "Total particles: " << npart_ << std::endl; for (std::size_t i { 0 }; i < species.ntags(); i++) { - std::cout << "Tag: " << i << " count: " << npart_per_tag_arr[i] << std::endl; + std::cout << "Tag: " << i << " count: " << npart_per_tag_arr_[i] << std::endl; } } MPI_Barrier(MPI_COMM_WORLD); } } // Get the indices of all alive particles - auto &this_ux1 = species.ux1; - auto &this_ux2 = species.ux2; - auto &this_ux3 = species.ux3; - auto &this_weight = species.weight; - auto &this_phi = species.phi; - auto &this_i1 = species.i1; - auto &this_i1_prev = species.i1_prev; - auto &this_i2 = species.i2; - auto &this_i3 = species.i3; - auto &this_i2_prev = species.i2_prev; - auto &this_i3_prev = species.i3_prev; - auto &this_dx1 = species.dx1; - auto &this_dx1_prev = species.dx1_prev; - auto &this_dx2 = species.dx2; - auto &this_dx3 = species.dx3; - auto &this_dx2_prev = species.dx2_prev; - auto &this_dx3_prev = species.dx3_prev; - auto &this_tag = species.tag; + auto &this_i1 = species.i1; + auto &this_i2 = species.i2; + auto &this_i3 = species.i3; + auto &this_i1_prev = species.i1_prev; + auto &this_i2_prev = species.i2_prev; + auto &this_i3_prev = species.i3_prev; + auto &this_dx1 = species.dx1; + auto &this_dx2 = species.dx2; + auto &this_dx3 = species.dx3; + auto &this_dx1_prev = species.dx1_prev; + auto &this_dx2_prev = species.dx2_prev; + auto &this_dx3_prev = species.dx3_prev; + auto &this_ux1 = species.ux1; + auto &this_ux2 = species.ux2; + auto &this_ux3 = species.ux3; + auto &this_weight = species.weight; + auto &this_phi = species.phi; + auto &this_tag = species.tag; // Find indices of tag = alive particles Kokkos::View indices_alive("indices_alive", total_alive); Kokkos::View alive_counter("counter_alive", 1); @@ -1036,29 +1062,29 @@ namespace ntt { "Error in finding alive particles", HERE); - comm::MoveDeadToEnd(species.i1, indices_alive); - comm::MoveDeadToEnd(species.i1_prev, indices_alive); - comm::MoveDeadToEnd(species.dx1_prev, indices_alive); - comm::MoveDeadToEnd(species.ux1, indices_alive); - comm::MoveDeadToEnd(species.ux2, indices_alive); - comm::MoveDeadToEnd(species.ux3, indices_alive); - comm::MoveDeadToEnd(species.weight, indices_alive); + MoveDeadToEnd(species.i1, indices_alive); + MoveDeadToEnd(species.dx1, indices_alive); + MoveDeadToEnd(species.dx1_prev, indices_alive); + MoveDeadToEnd(species.ux1, indices_alive); + MoveDeadToEnd(species.ux2, indices_alive); + MoveDeadToEnd(species.ux3, indices_alive); + MoveDeadToEnd(species.weight, indices_alive); // Update i2, dx2, i2_prev, dx2_prev if constexpr(D == Dim::_2D || D == Dim::_3D){ - comm::MoveDeadToEnd(species.i2, indices_alive); - comm::MoveDeadToEnd(species.i2_prev, indices_alive); - comm::MoveDeadToEnd(species.dx2, indices_alive); - comm::MoveDeadToEnd(species.dx2_prev, indices_alive); + MoveDeadToEnd(species.i2, indices_alive); + MoveDeadToEnd(species.i2_prev, indices_alive); + MoveDeadToEnd(species.dx2, indices_alive); + MoveDeadToEnd(species.dx2_prev, indices_alive); if constexpr(D == Dim::_2D && M::CoordType != Coord::Cart){ - comm::MoveDeadToEnd(species.phi, indices_alive); + MoveDeadToEnd(species.phi, indices_alive); } } // Update i3, dx3, i3_prev, dx3_prev if constexpr(D == Dim::_3D){ - comm::MoveDeadToEnd(species.i3, indices_alive); - comm::MoveDeadToEnd(species.i3_prev, indices_alive); - comm::MoveDeadToEnd(species.dx3, indices_alive); - comm::MoveDeadToEnd(species.dx3_prev, indices_alive); + MoveDeadToEnd(species.i3, indices_alive); + MoveDeadToEnd(species.i3_prev, indices_alive); + MoveDeadToEnd(species.dx3, indices_alive); + MoveDeadToEnd(species.dx3_prev, indices_alive); } // tags (set first total_alive to alive and rest to dead) Kokkos::parallel_for( @@ -1077,6 +1103,16 @@ namespace ntt { species.set_npart(total_alive); + std::tie(npart_per_tag_arr, + tag_offset) = species.npart_per_tag(); + raise::FatalIf(npart_per_tag_arr[ParticleTag::alive] != total_alive, + "Error in removing dead particles: alive count doesn't match", + HERE); + raise::FatalIf(npart_per_tag_arr[ParticleTag::dead] != 0, + "Error in removing dead particles: not all particles are dead", + HERE); + + { auto [npart_per_tag_arr_, tag_offset_] = species.npart_per_tag(); @@ -1095,9 +1131,9 @@ namespace ntt { std::cout << "Rank: " << rank << std::endl; std::cout << "Total alive: " << total_alive_ << std::endl; std::cout << "Total dead: " << total_dead_ << std::endl; - std::cout << "Total particles: " << npart << std::endl; + std::cout << "Total particles: " << npart_ << std::endl; for (std::size_t i { 0 }; i < species.ntags(); i++) { - std::cout << "Tag: " << i << " count: " << npart_per_tag_arr[i] << std::endl; + std::cout << "Tag: " << i << " count: " << npart_per_tag_arr_[i] << std::endl; } } MPI_Barrier(MPI_COMM_WORLD); From c719c1f8bcee71d9300ed7b537f2a7b6e2a30f70 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 7 Jan 2025 06:59:25 -0500 Subject: [PATCH 072/124] print mpi ranks during sendrecv --- src/framework/domain/comm_mpi.hpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 7b9a22eee..cb3e18caa 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -564,6 +564,31 @@ namespace comm { auto iteration = 0; auto current_received = 0; + + { + // For debugging purposes + // Loop over all mpi processes + int rank, maxranks; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &maxranks); + for (auto i = 0; i < maxranks; ++i) { + MPI_Barrier(MPI_COMM_WORLD); + if (rank == i) { + for (auto &direction : dir::Directions::all){ + const auto send_rank = send_ranks[iteration]; + const auto recv_rank = recv_ranks[iteration]; + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto send_count = npart_per_tag_arr[tag_send]; + const auto recv_count = npart_per_tag_arr_recv[tag_recv]; + printf("Current MPI rank %d, send rank %d recv rank %d, ", rank, + send_rank, recv_rank); + printf("send count %d, recv count %d\n", send_count, recv_count); + } + } + } + } + + for (auto& direction : dir::Directions::all) { const auto send_rank = send_ranks[iteration]; const auto recv_rank = recv_ranks[iteration]; From e43b7162d2f3e8d76df28d841140d75005d66d4e Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 7 Jan 2025 11:53:24 -0500 Subject: [PATCH 073/124] changed communications in fields --- src/framework/domain/comm_mpi.hpp | 216 +++++++++++-------- src/framework/domain/communications.cpp | 265 ++++-------------------- 2 files changed, 173 insertions(+), 308 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index cb3e18caa..1395c8191 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -182,16 +182,13 @@ namespace comm { } } - auto send_fld_h = Kokkos::create_mirror_view(send_fld); - auto recv_fld_h = Kokkos::create_mirror_view(recv_fld); - Kokkos::deep_copy(send_fld_h, send_fld); if (send_rank >= 0 && recv_rank >= 0) { - MPI_Sendrecv(send_fld_h.data(), + MPI_Sendrecv(send_fld.data(), nsend, mpi::get_type(), send_rank, 0, - recv_fld_h.data(), + recv_fld.data(), nrecv, mpi::get_type(), recv_rank, @@ -199,7 +196,7 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if (send_rank >= 0) { - MPI_Send(send_fld_h.data(), + MPI_Send(send_fld.data(), nsend, mpi::get_type(), send_rank, @@ -207,8 +204,7 @@ namespace comm { MPI_COMM_WORLD); } else if (recv_rank >= 0) { - auto recv_fld_h = Kokkos::create_mirror_view(recv_fld); - MPI_Recv(recv_fld_h.data(), + MPI_Recv(recv_fld.data(), nrecv, mpi::get_type(), recv_rank, @@ -218,7 +214,6 @@ namespace comm { } else { raise::Error("CommunicateField called with negative ranks", HERE); } - Kokkos::deep_copy(recv_fld, recv_fld_h); if (recv_rank >= 0) { @@ -297,67 +292,35 @@ namespace comm { const range_tuple_t& recv_slice) { const std::size_t send_count = send_slice.second - send_slice.first; const std::size_t recv_count = recv_slice.second - recv_slice.first; - // Make arrays on the host - auto send_arr_h = Kokkos::create_mirror_view(Kokkos::subview(arr, send_slice)); - Kokkos::deep_copy(send_arr_h, Kokkos::subview(arr, send_slice)); - auto recv_arr_h = Kokkos::create_mirror_view(Kokkos::subview(arr, recv_slice)); if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { - MPI_Sendrecv(send_arr_h.data(), + MPI_Sendrecv(arr.data() + send_slice.first, send_count, mpi::get_type(), send_rank, 0, - recv_arr_h.data(), + arr.data() + recv_slice.first, recv_count, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - //MPI_Sendrecv(arr.data() + send_slice.first, - // send_count, - // mpi::get_type(), - // send_rank, - // 0, - // arr.data() + recv_slice.first, - // recv_count, - // mpi::get_type(), - // recv_rank, - // 0, - // MPI_COMM_WORLD, - // MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { - MPI_Send( send_arr_h.data(), - send_count, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - //MPI_Send(arr.data() + send_slice.first, - // send_count, - // mpi::get_type(), - // send_rank, - // 0, - // MPI_COMM_WORLD); + MPI_Send(arr.data() + send_slice.first, + send_count, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { - MPI_Recv( recv_arr_h.data(), - recv_count, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - //MPI_Recv(arr.data() + recv_slice.first, - // recv_count, - // mpi::get_type(), - // recv_rank, - // 0, - // MPI_COMM_WORLD, - // MPI_STATUS_IGNORE); - } - if ((recv_rank >= 0) and (recv_count > 0)) { - Kokkos::deep_copy(Kokkos::subview(arr, recv_slice), recv_arr_h); + MPI_Recv(arr.data() + recv_slice.first, + recv_count, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); } } @@ -544,6 +507,12 @@ namespace comm { const auto n_alive = npart_per_tag_arr[ParticleTag::alive]; const auto n_dead = npart_per_tag_arr[ParticleTag::dead]; + // Debug test: print send and recv count + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Total send: %d, Total recv: %d \n", rank, total_send, total_recv); + } /* Brief on recv buffers: Each recv buffer contains all the received arrays of a given type. The different physical quantities are stored next to each other @@ -564,31 +533,6 @@ namespace comm { auto iteration = 0; auto current_received = 0; - - { - // For debugging purposes - // Loop over all mpi processes - int rank, maxranks; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &maxranks); - for (auto i = 0; i < maxranks; ++i) { - MPI_Barrier(MPI_COMM_WORLD); - if (rank == i) { - for (auto &direction : dir::Directions::all){ - const auto send_rank = send_ranks[iteration]; - const auto recv_rank = recv_ranks[iteration]; - const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); - const auto send_count = npart_per_tag_arr[tag_send]; - const auto recv_count = npart_per_tag_arr_recv[tag_recv]; - printf("Current MPI rank %d, send rank %d recv rank %d, ", rank, - send_rank, recv_rank); - printf("send count %d, recv count %d\n", send_count, recv_count); - } - } - } - } - - for (auto& direction : dir::Directions::all) { const auto send_rank = send_ranks[iteration]; const auto recv_rank = recv_ranks[iteration]; @@ -729,6 +673,12 @@ namespace comm { if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { + // Debug: Print the rank and type of mpi operation performed + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Performing sendrecv operation \n", rank); + } MPI_Sendrecv(send_buffer_int_h.data(), send_count * NINTS, mpi::get_type(), @@ -778,6 +728,12 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { + // Debug: Print the rank and type of mpi operation performed + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Performing send operation \n", rank); + } MPI_Send(send_buffer_int_h.data(), send_count * NINTS, mpi::get_type(), @@ -803,6 +759,12 @@ namespace comm { 0, MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { + // Debug: Print the rank and type of mpi operation performed + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Performing recv operation \n", rank); + } MPI_Recv(recv_buffer_int_h.data() + receive_offset_int, recv_count * NINTS, mpi::get_type(), @@ -834,8 +796,59 @@ namespace comm { } current_received += recv_count; iteration++; - + // Debug test: Print recv buffer before and after + /* + { + int total_ranks; + MPI_Comm_size(MPI_COMM_WORLD, &total_ranks); + for (int allranks=0; allranks current_offset("current_offset", species.ntags()); auto &this_tag_offset = tag_offset; + auto n_alive = npart_per_tag_arr[ParticleTag::alive]; + if constexpr (D == Dim::_1D){ Kokkos::parallel_for( "PermuteVector and Displace", @@ -779,7 +781,7 @@ namespace ntt { // tag = 1->N (excluding dead and alive) else{ const auto idx_permute_vec = this_tag_offset(current_tag) - - total_alive + + n_alive + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -808,7 +810,7 @@ namespace ntt { // tag = 1->N (excluding dead and alive) else{ const auto idx_permute_vec = this_tag_offset(current_tag) - - total_alive + + n_alive + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -839,7 +841,7 @@ namespace ntt { // tag = 1->N (excluding dead and alive) else{ const auto idx_permute_vec = this_tag_offset(current_tag) - - total_alive + + n_alive + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -913,235 +915,48 @@ namespace ntt { }); } - // Communicate the arrays - comm::CommunicateParticlesBuffer(species, permute_vector, allocation_vector, - this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, - send_ranks, recv_ranks); -#endif - } - } - - -/* - Function to copy the alive particle data the arrays to a buffer and then back - to the particle arrays -*/ - template - void MoveDeadToEnd(array_t& arr, - Kokkos::View indices_alive) { - auto n_alive = indices_alive.extent(0); - auto buffer = Kokkos::View("buffer", n_alive); - Kokkos::parallel_for( - "PopulateBufferAlive", - n_alive, - Lambda(const std::size_t p) { - buffer(p) = arr(indices_alive(p)); - }); - - Kokkos::parallel_for( - "CopyBufferToArr", - n_alive, - Lambda(const std::size_t p) { - arr(p) = buffer(p); - }); - return; - } - - /* - Function to remove dead particles from the domain - - Consider the following particle quantity array - <---xxx---x---xx---xx-----------xx----x--> (qty) - - = alive - x = dead - ntot = nalive + ndead - - (1) Copy all alive particle data to buffer - <---xxx---x---xx---xx-----------xx----x--> (qty) - | - | - v - <--------------------------> buffer - (nalive) - - (2) Copy from buffer to the beginning of the array - overwritting all particles - <--------------------------> buffer - (nalive) - | - | - v - <--------------------------xx----x--> (qty) - ^ - (nalive) - - (3) Set npart to nalive - */ - template - void Metadomain::RemoveDeadParticles(Domain& domain, - timer::Timers* timers){ - for (auto& species : domain.species) { - auto [npart_per_tag_arr, - tag_offset] = species.npart_per_tag(); - const auto npart = static_cast(species.npart()); - const auto total_alive = static_cast( - npart_per_tag_arr[ParticleTag::alive]); - const auto total_dead = static_cast( - npart_per_tag_arr[ParticleTag::dead]); - - // Check that only alive and dead particles are present - for (std::size_t i { 0 }; i < species.ntags(); i++) { - if (i != ParticleTag::alive && i != ParticleTag::dead){ - raise::FatalIf(npart_per_tag_arr[i] != 0, - "Particle tags can only be dead or alive at this point", - HERE); - } - } + /* + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 1 && species.label() == "e+_b") { - auto [npart_per_tag_arr_, - tag_offset_] = species.npart_per_tag(); - auto npart_ = static_cast(species.npart()); - auto total_alive_ = static_cast( - npart_per_tag_arr_[ParticleTag::alive]); - auto total_dead_ = static_cast( - npart_per_tag_arr_[ParticleTag::dead]); + // Copy the tag array to host + auto tag_h = Kokkos::create_mirror_view(species.tag); + Kokkos::deep_copy(tag_h, species.tag); + std::cout << "Tag locs before send" << std::endl; + for (std::size_t i { 0 }; i < species.npart(); i++) { + if (tag_h(i) != ParticleTag::alive) + std::cout <<" Tag: " << tag_h(i) << " loc: "<< i << std::endl; + } - int rank, totranks; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &totranks); - for (std::size_t current_rank=0; current_rank indices_alive("indices_alive", total_alive); - Kokkos::View alive_counter("counter_alive", 1); - Kokkos::deep_copy(alive_counter, 0); - Kokkos::parallel_for( - "Indices of Alive Particles", - species.npart(), - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); - indices_alive(idx) = p; + // Print the permute vector as well + auto permute_vector_h = Kokkos::create_mirror_view(permute_vector); + Kokkos::deep_copy(permute_vector_h, permute_vector); + for (std::size_t i { 0 }; i < total_holes; ++i) { + std::cout << "Rank: " << rank << " Permuted vector: " << permute_vector_h(i) << + " tag: " << tag_h(permute_vector_h(i)) << std::endl; } - }); - // Sanity check: alive_counter must be equal to total_alive - auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); - Kokkos::deep_copy(alive_counter_h, alive_counter); - raise::FatalIf(alive_counter_h(0) != total_alive, - "Error in finding alive particles", - HERE); - - MoveDeadToEnd(species.i1, indices_alive); - MoveDeadToEnd(species.dx1, indices_alive); - MoveDeadToEnd(species.dx1_prev, indices_alive); - MoveDeadToEnd(species.ux1, indices_alive); - MoveDeadToEnd(species.ux2, indices_alive); - MoveDeadToEnd(species.ux3, indices_alive); - MoveDeadToEnd(species.weight, indices_alive); - // Update i2, dx2, i2_prev, dx2_prev - if constexpr(D == Dim::_2D || D == Dim::_3D){ - MoveDeadToEnd(species.i2, indices_alive); - MoveDeadToEnd(species.i2_prev, indices_alive); - MoveDeadToEnd(species.dx2, indices_alive); - MoveDeadToEnd(species.dx2_prev, indices_alive); - if constexpr(D == Dim::_2D && M::CoordType != Coord::Cart){ - MoveDeadToEnd(species.phi, indices_alive); } - } - // Update i3, dx3, i3_prev, dx3_prev - if constexpr(D == Dim::_3D){ - MoveDeadToEnd(species.i3, indices_alive); - MoveDeadToEnd(species.i3_prev, indices_alive); - MoveDeadToEnd(species.dx3, indices_alive); - MoveDeadToEnd(species.dx3_prev, indices_alive); - } - // tags (set first total_alive to alive and rest to dead) - Kokkos::parallel_for( - "Make tags alive", - total_alive, - Lambda(index_t p) { - this_tag(p) = ParticleTag::alive; - }); - - Kokkos::parallel_for( - "Make tags dead", - total_dead, - Lambda(index_t p) { - this_tag(total_alive + p) = ParticleTag::dead; - }); - - species.set_npart(total_alive); - - std::tie(npart_per_tag_arr, - tag_offset) = species.npart_per_tag(); - raise::FatalIf(npart_per_tag_arr[ParticleTag::alive] != total_alive, - "Error in removing dead particles: alive count doesn't match", - HERE); - raise::FatalIf(npart_per_tag_arr[ParticleTag::dead] != 0, - "Error in removing dead particles: not all particles are dead", - HERE); - - - { - auto [npart_per_tag_arr_, - tag_offset_] = species.npart_per_tag(); - auto npart_ = static_cast(species.npart()); - auto total_alive_ = static_cast( - npart_per_tag_arr_[ParticleTag::alive]); - auto total_dead_ = static_cast( - npart_per_tag_arr_[ParticleTag::dead]); - - int rank, totranks; + */ + { + int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &totranks); - for (std::size_t current_rank=0; current_rank(species, permute_vector, allocation_vector, + this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, + send_ranks, recv_ranks); +#endif + } } template struct Metadomain>; From 42a6ea2668e31b58b0830ad6f48d4cfffa7024b8 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 7 Jan 2025 12:13:53 -0500 Subject: [PATCH 074/124] small change in metadomain header --- src/engines/srpic.hpp | 9 --------- src/framework/domain/metadomain.h | 1 - 2 files changed, 10 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index fd1ca226a..d751c712a 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -175,15 +175,6 @@ namespace ntt { ParticleInjector(dom); timers.stop("Injector"); } - - if (step % 1 == 0 && step > 0){ - MPI_Barrier(MPI_COMM_WORLD); - timers.start("RemoveDead"); - m_metadomain.RemoveDeadParticles(dom, &timers); - timers.stop("RemoveDead"); - MPI_Barrier(MPI_COMM_WORLD); - } - } /* algorithm substeps --------------------------------------------------- */ diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 6bd3d29d8..9e94bf89f 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -91,7 +91,6 @@ namespace ntt { void CommunicateParticles(Domain&, timer::Timers*); void CommunicateParticlesBuffer(Domain&, timer::Timers*); void SetParticleIDs(Domain&); - void RemoveDeadParticles(Domain&, timer::Timers* ); /** * @param global_ndomains total number of domains From 530485c3e1e89053570da488eaa2bc92b9f5241e Mon Sep 17 00:00:00 2001 From: pmocz Date: Wed, 8 Jan 2025 14:08:07 -0500 Subject: [PATCH 075/124] bugfix for duplicate symbols --- src/engines/engine_printer.cpp | 16 ++++++++-------- src/engines/engine_run.cpp | 16 ++++++++-------- src/engines/engine_step_report.cpp | 16 ++++++++-------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index 90dec3326..1b8009618 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -415,13 +415,13 @@ namespace ntt { } } - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; + template void Engine>::print_report() const; + template void Engine>::print_report() const; + template void Engine>::print_report() const; + template void Engine>::print_report() const; + template void Engine>::print_report() const; + template void Engine>::print_report() const; + template void Engine>::print_report() const; + template void Engine>::print_report() const; } // namespace ntt diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index 4485e0e40..60c5e30ab 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -92,12 +92,12 @@ namespace ntt { } } - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; + template void Engine>::run(); + template void Engine>::run(); + template void Engine>::run(); + template void Engine>::run(); + template void Engine>::run(); + template void Engine>::run(); + template void Engine>::run(); + template void Engine>::run(); } // namespace ntt diff --git a/src/engines/engine_step_report.cpp b/src/engines/engine_step_report.cpp index f2a35bb82..1681aabcc 100644 --- a/src/engines/engine_step_report.cpp +++ b/src/engines/engine_step_report.cpp @@ -226,14 +226,14 @@ namespace ntt { os << std::endl; } - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; + template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; + template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; + template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; + template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; + template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; + template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; + template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; + template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; } // namespace ntt // template From a5704740678b49aa7c819754d67cb3df8ebb07b8 Mon Sep 17 00:00:00 2001 From: Sasha Chernoglazov Date: Wed, 8 Jan 2025 20:25:15 -0500 Subject: [PATCH 076/124] correct communications with boundaries --- src/framework/domain/comm_mpi.hpp | 56 +++++++++++++------------ src/framework/domain/communications.cpp | 11 ++--- src/global/utils/progressbar.cpp | 4 +- 3 files changed, 35 insertions(+), 36 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 1395c8191..ce38a8261 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -53,7 +53,6 @@ namespace comm { (recv_rank == rank && recv_idx != idx), "Multiple-domain single-rank communication not yet implemented", HERE); - if ((send_idx == idx) and (recv_idx == idx)) { // trivial copy if sending to self and receiving from self @@ -456,7 +455,8 @@ namespace comm { std::vector npart_per_tag_arr, std::vector npart_per_tag_arr_recv, std::vector send_ranks, - std::vector recv_ranks) { + std::vector recv_ranks, + const dir::dirs_t& legal_directions) { // Pointers to the particle data arrays auto &this_ux1 = species.ux1; auto &this_ux2 = species.ux2; @@ -533,13 +533,17 @@ namespace comm { auto iteration = 0; auto current_received = 0; - for (auto& direction : dir::Directions::all) { + for (const auto& direction : legal_directions) { const auto send_rank = send_ranks[iteration]; const auto recv_rank = recv_ranks[iteration]; const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); const auto send_count = npart_per_tag_arr[tag_send]; const auto recv_count = npart_per_tag_arr_recv[tag_recv]; + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + } if (send_rank < 0 and recv_rank < 0) { continue; } @@ -677,50 +681,50 @@ namespace comm { { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing sendrecv operation \n", rank); + //printf("MPI rank: %d, Performing sendrecv operation, direction %d \n", rank, direction); } - MPI_Sendrecv(send_buffer_int_h.data(), + MPI_Sendrecv(send_buffer_int.data(), send_count * NINTS, mpi::get_type(), send_rank, 0, - recv_buffer_int_h.data() + receive_offset_int, + recv_buffer_int.data() + receive_offset_int, recv_count*NINTS, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buffer_real_h.data(), + MPI_Sendrecv(send_buffer_real.data(), send_count * NREALS, mpi::get_type(), send_rank, 0, - recv_buffer_real_h.data() + receive_offset_real, + recv_buffer_real.data() + receive_offset_real, recv_count*NREALS, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buffer_prtldx_h.data(), + MPI_Sendrecv(send_buffer_prtldx.data(), send_count * NFLOATS, mpi::get_type(), send_rank, 0, - recv_buffer_prtldx_h.data() + receive_offset_prtldx, + recv_buffer_prtldx.data() + receive_offset_prtldx, recv_count*NFLOATS, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buffer_long_h.data(), + MPI_Sendrecv(send_buffer_long.data(), send_count * NLONGS, mpi::get_type(), send_rank, 0, - recv_buffer_long_h.data() + receive_offset_long, + recv_buffer_long.data() + receive_offset_long, recv_count*NLONGS, mpi::get_type(), recv_rank, @@ -732,61 +736,61 @@ namespace comm { { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing send operation \n", rank); + //printf("MPI rank: %d, Performing send operation, direction %d \n", rank, direction); } - MPI_Send(send_buffer_int_h.data(), + MPI_Send(send_buffer_int.data(), send_count * NINTS, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); - MPI_Send(send_buffer_real_h.data(), + MPI_Send(send_buffer_real.data(), send_count * NREALS, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); - MPI_Send(send_buffer_prtldx_h.data(), + MPI_Send(send_buffer_prtldx.data(), send_count * NFLOATS, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); - MPI_Send(send_buffer_long_h.data(), + MPI_Send(send_buffer_long.data(), send_count * NLONGS, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); - } else if ((recv_rank >= 0) and (recv_count > 0)) { + } else if ((recv_rank >= 0) and (recv_count > 0)) { // Debug: Print the rank and type of mpi operation performed { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing recv operation \n", rank); + //printf("MPI rank: %d, Performing recv operation, direction %d \n", rank, direction); } - MPI_Recv(recv_buffer_int_h.data() + receive_offset_int, + MPI_Recv(recv_buffer_int.data() + receive_offset_int, recv_count * NINTS, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer_real_h.data() + receive_offset_real, + MPI_Recv(recv_buffer_real.data() + receive_offset_real, recv_count * NREALS, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer_prtldx_h.data() + receive_offset_prtldx, + MPI_Recv(recv_buffer_prtldx.data() + receive_offset_prtldx, recv_count * NFLOATS, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer_long_h.data() + receive_offset_long, + MPI_Recv(recv_buffer_long.data() + receive_offset_long, recv_count * NLONGS, mpi::get_type(), recv_rank, @@ -850,9 +854,9 @@ namespace comm { */ } // end over direction loop - Kokkos::deep_copy(recv_buffer_int, recv_buffer_int_h); + /*Kokkos::deep_copy(recv_buffer_int, recv_buffer_int_h); Kokkos::deep_copy(recv_buffer_real, recv_buffer_real_h); - Kokkos::deep_copy(recv_buffer_prtldx, recv_buffer_prtldx_h); + Kokkos::deep_copy(recv_buffer_prtldx, recv_buffer_prtldx_h);*/ if constexpr (D == Dim::_1D) { Kokkos::parallel_for( @@ -949,10 +953,8 @@ namespace comm { this_particleID(idx) = recv_buffer_long(NLONGS * p + 0); }); } - species.set_npart(species.npart() + std::max(permute_vector.extent(0), allocation_vector.extent(0)) - permute_vector.extent(0)); - /* { int rank; diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index d1ce06609..dc62338f7 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -686,6 +686,7 @@ namespace ntt { auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); + dir::dirs_t legal_directions; // Get receive counts + displacements for (auto& direction : dir::Directions::all) { @@ -703,6 +704,7 @@ namespace ntt { const auto nsend = npart_per_tag_arr[tag_send]; std::size_t nrecv = 0; + legal_directions.push_back(direction); send_ranks.push_back(send_rank); recv_ranks.push_back(recv_rank); send_inds.push_back(send_ind); @@ -945,16 +947,11 @@ namespace ntt { } } */ - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - std::cout << "Rank: " << rank << " Total sent: " << total_holes - total_dead << " Total recv: " << total_recv << std::endl; - } - + // Communicate the arrays comm::CommunicateParticlesBuffer(species, permute_vector, allocation_vector, this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, - send_ranks, recv_ranks); + send_ranks, recv_ranks, legal_directions); #endif } } diff --git a/src/global/utils/progressbar.cpp b/src/global/utils/progressbar.cpp index 38f65a790..74f952382 100644 --- a/src/global/utils/progressbar.cpp +++ b/src/global/utils/progressbar.cpp @@ -52,10 +52,10 @@ namespace pbar { } auto to_human_readable(long double t, const std::string& u) -> std::string { - const auto [tt, tu] = std::pair{t, u};//normalize_duration_fmt(t, u); + const auto [tt, tu] = normalize_duration_fmt(t, u); const auto t1 = static_cast(tt); const auto t2 = tt - static_cast(t1); - const auto [tt2, tu2] = std::pair{t2, tu};//normalize_duration_fmt(t2, tu); + const auto [tt2, tu2] = normalize_duration_fmt(t2, tu); return fmt::format("%d%s %d%s", t1, tu.c_str(), static_cast(tt2), tu2.c_str()); } From d63595f3a326eed5fb27268bb02af15b93f98a4a Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Wed, 8 Jan 2025 22:47:55 -0500 Subject: [PATCH 077/124] added dead particle function --- src/engines/srpic.hpp | 8 ++ src/framework/domain/communications.cpp | 170 ++++++++++++++++++++++++ 2 files changed, 178 insertions(+) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index d751c712a..0489d8508 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -175,6 +175,14 @@ namespace ntt { ParticleInjector(dom); timers.stop("Injector"); } + + if (step % 100 == 0 && step > 0){ + MPI_Barrier(MPI_COMM_WORLD); + timers.start("RemoveDead"); + m_metadomain.RemoveDeadParticles(dom, &timers); + timers.stop("RemoveDead"); + MPI_Barrier(MPI_COMM_WORLD); + } } /* algorithm substeps --------------------------------------------------- */ diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index dc62338f7..390c27fa8 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -956,6 +956,176 @@ namespace ntt { } } + /* + Function to copy the alive particle data the arrays to a buffer and then back + to the particle arrays +*/ + template + void MoveDeadToEnd(array_t& arr, + Kokkos::View indices_alive) { + auto n_alive = indices_alive.extent(0); + auto buffer = Kokkos::View("buffer", n_alive); + Kokkos::parallel_for( + "PopulateBufferAlive", + n_alive, + Lambda(const std::size_t p) { + buffer(p) = arr(indices_alive(p)); + }); + + Kokkos::parallel_for( + "CopyBufferToArr", + n_alive, + Lambda(const std::size_t p) { + arr(p) = buffer(p); + }); + return; + } + + /* + Function to remove dead particles from the domain + + Consider the following particle quantity array + <---xxx---x---xx---xx-----------xx----x--> (qty) + - = alive + x = dead + ntot = nalive + ndead + + (1) Copy all alive particle data to buffer + <---xxx---x---xx---xx-----------xx----x--> (qty) + | + | + v + <--------------------------> buffer + (nalive) + + (2) Copy from buffer to the beginning of the array + overwritting all particles + <--------------------------> buffer + (nalive) + | + | + v + <--------------------------xx----x--> (qty) + ^ + (nalive) + + (3) Set npart to nalive + */ + template + void Metadomain::RemoveDeadParticles(Domain& domain, + timer::Timers* timers){ + for (auto& species : domain.species) { + auto [npart_per_tag_arr, + tag_offset] = species.npart_per_tag(); + const auto npart = static_cast(species.npart()); + const auto total_alive = static_cast( + npart_per_tag_arr[ParticleTag::alive]); + const auto total_dead = static_cast( + npart_per_tag_arr[ParticleTag::dead]); + + // Check that only alive and dead particles are present + for (std::size_t i { 0 }; i < species.ntags(); i++) { + if (i != ParticleTag::alive && i != ParticleTag::dead){ + raise::FatalIf(npart_per_tag_arr[i] != 0, + "Particle tags can only be dead or alive at this point", + HERE); + } + } + + // Get the indices of all alive particles + auto &this_i1 = species.i1; + auto &this_i2 = species.i2; + auto &this_i3 = species.i3; + auto &this_i1_prev = species.i1_prev; + auto &this_i2_prev = species.i2_prev; + auto &this_i3_prev = species.i3_prev; + auto &this_dx1 = species.dx1; + auto &this_dx2 = species.dx2; + auto &this_dx3 = species.dx3; + auto &this_dx1_prev = species.dx1_prev; + auto &this_dx2_prev = species.dx2_prev; + auto &this_dx3_prev = species.dx3_prev; + auto &this_ux1 = species.ux1; + auto &this_ux2 = species.ux2; + auto &this_ux3 = species.ux3; + auto &this_weight = species.weight; + auto &this_phi = species.phi; + auto &this_tag = species.tag; + // Find indices of tag = alive particles + Kokkos::View indices_alive("indices_alive", total_alive); + Kokkos::View alive_counter("counter_alive", 1); + Kokkos::deep_copy(alive_counter, 0); + Kokkos::parallel_for( + "Indices of Alive Particles", + species.npart(), + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); + indices_alive(idx) = p; + } + }); + // Sanity check: alive_counter must be equal to total_alive + auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); + Kokkos::deep_copy(alive_counter_h, alive_counter); + raise::FatalIf(alive_counter_h(0) != total_alive, + "Error in finding alive particles", + HERE); + + MoveDeadToEnd(species.i1, indices_alive); + MoveDeadToEnd(species.dx1, indices_alive); + MoveDeadToEnd(species.dx1_prev, indices_alive); + MoveDeadToEnd(species.ux1, indices_alive); + MoveDeadToEnd(species.ux2, indices_alive); + MoveDeadToEnd(species.ux3, indices_alive); + MoveDeadToEnd(species.weight, indices_alive); + // Update i2, dx2, i2_prev, dx2_prev + if constexpr(D == Dim::_2D || D == Dim::_3D){ + MoveDeadToEnd(species.i2, indices_alive); + MoveDeadToEnd(species.i2_prev, indices_alive); + MoveDeadToEnd(species.dx2, indices_alive); + MoveDeadToEnd(species.dx2_prev, indices_alive); + if constexpr(D == Dim::_2D && M::CoordType != Coord::Cart){ + MoveDeadToEnd(species.phi, indices_alive); + } + } + // Update i3, dx3, i3_prev, dx3_prev + if constexpr(D == Dim::_3D){ + MoveDeadToEnd(species.i3, indices_alive); + MoveDeadToEnd(species.i3_prev, indices_alive); + MoveDeadToEnd(species.dx3, indices_alive); + MoveDeadToEnd(species.dx3_prev, indices_alive); + } + // tags (set first total_alive to alive and rest to dead) + Kokkos::parallel_for( + "Make tags alive", + total_alive, + Lambda(index_t p) { + this_tag(p) = ParticleTag::alive; + }); + + Kokkos::parallel_for( + "Make tags dead", + total_dead, + Lambda(index_t p) { + this_tag(total_alive + p) = ParticleTag::dead; + }); + + species.set_npart(total_alive); + + std::tie(npart_per_tag_arr, + tag_offset) = species.npart_per_tag(); + raise::FatalIf(npart_per_tag_arr[ParticleTag::alive] != total_alive, + "Error in removing dead particles: alive count doesn't match", + HERE); + raise::FatalIf(npart_per_tag_arr[ParticleTag::dead] != 0, + "Error in removing dead particles: not all particles are dead", + HERE); + + } + + return; + } + template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; From 8fc0cd6dd2137e0ccd0b69f66ade66d3b9c12480 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Wed, 8 Jan 2025 23:28:45 -0500 Subject: [PATCH 078/124] added header --- src/framework/domain/metadomain.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 9e94bf89f..9e2c2bb9d 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -91,6 +91,7 @@ namespace ntt { void CommunicateParticles(Domain&, timer::Timers*); void CommunicateParticlesBuffer(Domain&, timer::Timers*); void SetParticleIDs(Domain&); + void RemoveDeadParticles(Domain& ,timer::Timers* ); /** * @param global_ndomains total number of domains From 0ef23120b66a85dc4a3a2feb6303b4eb76185e03 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 22 Jan 2025 15:19:34 -0500 Subject: [PATCH 079/124] hdf5_root -- optionally set --- CMakeLists.txt | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2618a0cb2..efd240993 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,23 +101,12 @@ if(${output}) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/adios2Config.cmake) find_or_fetch_dependency(adios2 FALSE) if(NOT DEFINED ENV{HDF5_ROOT}) - set(USE_CUSTOM_HDF5 OFF) if(DEFINED ENV{CONDA_PREFIX}) execute_process(COMMAND bash -c "conda list | grep \"hdf5\" -q" RESULT_VARIABLE HDF5_INSTALLED) if(HDF5_INSTALLED EQUAL 0) set(HDF5_ROOT $ENV{CONDA_PREFIX}) - else() - set(USE_CUSTOM_HDF5 ON) endif() - else() - set(USE_CUSTOM_HDF5 ON) - endif() - if(USE_CUSTOM_HDF5) - message( - FATAL_ERROR - "HDF5_ROOT is not set. Please set it to the root of the HDF5 installation" - ) endif() endif() find_package(HDF5 REQUIRED) From 16a4086ccd1f8ef8fb54fc02d2682b11bc8693c0 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 22 Jan 2025 15:20:30 -0500 Subject: [PATCH 080/124] sort_interval -> clear_interval --- input.example.toml | 13 ++- setups/srpic/blob/blob.toml | 40 ++++---- setups/srpic/em_vacuum/em_vacuum.toml | 22 ++--- setups/srpic/langmuir/langmuir.toml | 36 +++---- setups/srpic/magnetar/magnetar.toml | 96 +++++++++---------- setups/srpic/magnetosphere/magnetosphere.toml | 58 +++++------ setups/srpic/monopole/monopole.toml | 56 +++++------ setups/srpic/shock/shock.toml | 30 +++--- setups/srpic/turbulence/turbulence.toml | 34 +++---- setups/srpic/weibel/weibel.toml | 48 +++++----- 10 files changed, 216 insertions(+), 217 deletions(-) diff --git a/input.example.toml b/input.example.toml index 5ee34d65d..2f6d2b285 100644 --- a/input.example.toml +++ b/input.example.toml @@ -105,7 +105,7 @@ # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]) [["ATMOSPHERE", "ABSORB"]] # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["ABSORB"]] particles = "" - + [grid.boundaries.absorb] # Size of the absorption layer in physical (code) units: # @type: float @@ -119,7 +119,7 @@ coeff = "" [grid.boundaries.atmosphere] - # @required: if ATMOSPHERE is one of the boundaries + # @required: if ATMOSPHERE is one of the boundaries # Temperature of the atmosphere in units of m0 c^2 # @type: float temperature = "" @@ -210,7 +210,7 @@ # @type: float: ~1 # @default: 1.0 correction = "" - + # @inferred: # - dt [= CFL * dx0] # @brief: timestep duration @@ -252,12 +252,11 @@ # @type: bool # @default: false use_weights = "" - # Timesteps between particle re-sorting: + # Timesteps between particle re-sorting (removing dead particles): # @type: unsigned int # @default: 100 - # @note: When MPI is enable, particles are sorted every step. - # @note: When `sort_interval` == 0, the sorting is disabled. - sort_interval = "" + # @note: set to 0 to disable re-sorting + clear_interval = "" # @inferred: # - nspec diff --git a/setups/srpic/blob/blob.toml b/setups/srpic/blob/blob.toml index 7c03b1f9e..7a047f348 100644 --- a/setups/srpic/blob/blob.toml +++ b/setups/srpic/blob/blob.toml @@ -1,24 +1,24 @@ [simulation] - name = "blob" - engine = "srpic" + name = "blob" + engine = "srpic" runtime = 100.0 [simulation.domain] - decomposition = [2,1,1] + decomposition = [2, 1, 1] [grid] resolution = [1024, 1024] - extent = [[-10.0, 10.0], [-10.0, 10.0]] + extent = [[-10.0, 10.0], [-10.0, 10.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 1.0 + larmor0 = 1.0 skindepth0 = 1.0 [algorithms] @@ -31,26 +31,26 @@ ppc0 = 16.0 [[particles.species]] - label = "e-_p" - mass = 1.0 - charge = -1.0 + label = "e-_p" + mass = 1.0 + charge = -1.0 maxnpart = 1e7 [[particles.species]] - label = "e+_p" - mass = 1.0 - charge = 1.0 + label = "e+_p" + mass = 1.0 + charge = 1.0 maxnpart = 1e7 [setup] - temp_1 = 1e-4 - x1c = -5.0 - x2c = 0.0 - v_max = 50.0 - dr = 1.0 - + temp_1 = 1e-4 + x1c = -5.0 + x2c = 0.0 + v_max = 50.0 + dr = 1.0 + [output] - format = "hdf5" + format = "hdf5" interval_time = 1.0 [output.fields] diff --git a/setups/srpic/em_vacuum/em_vacuum.toml b/setups/srpic/em_vacuum/em_vacuum.toml index 156c8d308..23381b1c6 100644 --- a/setups/srpic/em_vacuum/em_vacuum.toml +++ b/setups/srpic/em_vacuum/em_vacuum.toml @@ -1,21 +1,21 @@ [simulation] - name = "em_vacuum" - engine = "srpic" + name = "em_vacuum" + engine = "srpic" runtime = 2.0 [grid] resolution = [256, 512] - extent = [[-1.0, 1.0], [-2.0, 2.0]] + extent = [[-1.0, 1.0], [-2.0, 2.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 0.1 + larmor0 = 0.1 skindepth0 = 0.01 [algorithms] @@ -28,12 +28,12 @@ [setup] amplitude = 1.0 - kx1 = 1 - kx2 = 1 - kx3 = 0 - + kx1 = 1 + kx2 = 1 + kx3 = 0 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.1 [output.fields] diff --git a/setups/srpic/langmuir/langmuir.toml b/setups/srpic/langmuir/langmuir.toml index 2f3520fc5..b054a940d 100644 --- a/setups/srpic/langmuir/langmuir.toml +++ b/setups/srpic/langmuir/langmuir.toml @@ -1,21 +1,21 @@ [simulation] - name = "langmuir" - engine = "srpic" + name = "langmuir" + engine = "srpic" runtime = 1.0 [grid] resolution = [2048, 512] - extent = [[0.0, 1.0], [0.0, 0.25]] + extent = [[0.0, 1.0], [0.0, 0.25]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 0.1 + larmor0 = 0.1 skindepth0 = 0.01 [algorithms] @@ -28,24 +28,24 @@ ppc0 = 14.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e7 + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e7 + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 [setup] vmax = 0.1 - nx1 = 4 - nx2 = 2 - + nx1 = 4 + nx2 = 2 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.0025 [output.fields] diff --git a/setups/srpic/magnetar/magnetar.toml b/setups/srpic/magnetar/magnetar.toml index 2a2260af5..fab2eb01c 100644 --- a/setups/srpic/magnetar/magnetar.toml +++ b/setups/srpic/magnetar/magnetar.toml @@ -1,17 +1,17 @@ [simulation] - name = "magnetar" - engine = "srpic" + name = "magnetar" + engine = "srpic" runtime = 50.0 [grid] - resolution = [2048,1024] - extent = [[1.0, 400.0]] + resolution = [2048, 1024] + extent = [[1.0, 400.0]] [grid.metric] metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] [grid.boundaries.absorb] @@ -19,13 +19,13 @@ [grid.boundaries.atmosphere] temperature = 0.1 - density = 40.0 - height = 0.02 - species = [1, 2] - ds = 0.5 + density = 40.0 + height = 0.02 + species = [1, 2] + ds = 0.5 [scales] - larmor0 = 1e-5 + larmor0 = 1e-5 skindepth0 = 0.01 [algorithms] @@ -36,59 +36,59 @@ [algorithms.gca] e_ovr_b_max = 0.9 - larmor_max = 100.0 + larmor_max = 100.0 [particles] - ppc0 = 4.0 - use_weights = true - sort_interval = 100 + ppc0 = 4.0 + use_weights = true + clear_interval = 100 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [setup] - Bsurf = 1.0 - omega = 0.0125 - pp_thres = 10.0 + Bsurf = 1.0 + omega = 0.0125 + pp_thres = 10.0 gamma_pairs = 1.75 [output] @@ -96,7 +96,7 @@ [output.fields] interval_time = 0.5 - quantities = ["N_1", "N_2", "N_3", "N_4", "N_5", "N_6", "B", "E", "J"] + quantities = ["N_1", "N_2", "N_3", "N_4", "N_5", "N_6", "B", "E", "J"] [output.particles] enable = false diff --git a/setups/srpic/magnetosphere/magnetosphere.toml b/setups/srpic/magnetosphere/magnetosphere.toml index 34e04b02d..4c7c9117d 100644 --- a/setups/srpic/magnetosphere/magnetosphere.toml +++ b/setups/srpic/magnetosphere/magnetosphere.toml @@ -1,31 +1,31 @@ [simulation] - name = "magnetosphere" - engine = "srpic" + name = "magnetosphere" + engine = "srpic" runtime = 60.0 [grid] resolution = [2048, 1024] - extent = [[1.0, 50.0]] + extent = [[1.0, 50.0]] [grid.metric] metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] - + [grid.boundaries.absorb] ds = 1.0 [grid.boundaries.atmosphere] temperature = 0.1 - density = 10.0 - height = 0.02 - species = [1, 2] - ds = 2.0 - + density = 10.0 + height = 0.02 + species = [1, 2] + ds = 2.0 + [scales] - larmor0 = 2e-5 + larmor0 = 2e-5 skindepth0 = 0.01 [algorithms] @@ -36,37 +36,37 @@ [algorithms.gca] e_ovr_b_max = 0.9 - larmor_max = 1.0 + larmor_max = 1.0 [particles] - ppc0 = 5.0 - use_weights = true - sort_interval = 100 + ppc0 = 5.0 + use_weights = true + clear_interval = 100 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e8 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e8 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e8 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e8 + pusher = "Boris,GCA" [setup] - Bsurf = 1.0 + Bsurf = 1.0 period = 60.0 [output] format = "hdf5" - + [output.fields] interval_time = 0.1 - quantities = ["N_1", "N_2", "E", "B", "T00"] + quantities = ["N_1", "N_2", "E", "B", "T00"] [output.particles] enable = false @@ -75,5 +75,5 @@ enable = false [diagnostics] - interval = 50 + interval = 50 colored_stdout = true diff --git a/setups/srpic/monopole/monopole.toml b/setups/srpic/monopole/monopole.toml index 169837489..cf735fce8 100644 --- a/setups/srpic/monopole/monopole.toml +++ b/setups/srpic/monopole/monopole.toml @@ -1,31 +1,31 @@ [simulation] - name = "monopole" - engine = "srpic" + name = "monopole" + engine = "srpic" runtime = 60.0 [grid] resolution = [2048, 1024] - extent = [[1.0, 50.0]] + extent = [[1.0, 50.0]] [grid.metric] metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] - + [grid.boundaries.absorb] ds = 1.0 [grid.boundaries.atmosphere] temperature = 0.1 - density = 10.0 - height = 0.02 - species = [1, 2] - ds = 2.0 - + density = 10.0 + height = 0.02 + species = [1, 2] + ds = 2.0 + [scales] - larmor0 = 2e-5 + larmor0 = 2e-5 skindepth0 = 0.01 [algorithms] @@ -36,38 +36,38 @@ [algorithms.gca] e_ovr_b_max = 0.9 - larmor_max = 1.0 + larmor_max = 1.0 [particles] - ppc0 = 5.0 - use_weights = true - sort_interval = 100 + ppc0 = 5.0 + use_weights = true + clear_interval = 100 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 + label = "e-" + mass = 1.0 + charge = -1.0 maxnpart = 1e8 - pusher = "Boris,GCA" + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 + label = "e+" + mass = 1.0 + charge = 1.0 maxnpart = 1e8 - pusher = "Boris,GCA" + pusher = "Boris,GCA" [setup] - Bsurf = 1.0 + Bsurf = 1.0 period = 60.0 [output] format = "hdf5" - + [output.fields] interval_time = 0.1 - quantities = ["N_1", "N_2", "E", "B", "T00"] - mom_smooth = 2 + quantities = ["N_1", "N_2", "E", "B", "T00"] + mom_smooth = 2 [output.particles] enable = false @@ -76,5 +76,5 @@ enable = false [diagnostics] - interval = 50 + interval = 50 colored_stdout = true diff --git a/setups/srpic/shock/shock.toml b/setups/srpic/shock/shock.toml index f48edb2d6..7b2cdde2c 100644 --- a/setups/srpic/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -1,21 +1,21 @@ [simulation] - name = "shock" - engine = "srpic" + name = "shock" + engine = "srpic" runtime = 50.0 [grid] resolution = [2048, 128] - extent = [[0.0, 10.0], [-0.3125, 0.3125]] + extent = [[0.0, 10.0], [-0.3125, 0.3125]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["CONDUCTOR", "ABSORB"], ["PERIODIC"]] + fields = [["CONDUCTOR", "ABSORB"], ["PERIODIC"]] particles = [["REFLECT", "ABSORB"], ["PERIODIC"]] - + [scales] - larmor0 = 1e-2 + larmor0 = 1e-2 skindepth0 = 1e-2 [algorithms] @@ -28,24 +28,24 @@ ppc0 = 16.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 + label = "e-" + mass = 1.0 + charge = -1.0 maxnpart = 1e8 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 + label = "e+" + mass = 1.0 + charge = 1.0 maxnpart = 1e8 [setup] - drift_ux = 0.1 + drift_ux = 0.1 temperature = 1e-3 [output] interval_time = 0.1 - format = "hdf5" - + format = "hdf5" + [output.fields] quantities = ["N_1", "N_2", "E", "B", "T0i_1", "T0i_2", "J"] diff --git a/setups/srpic/turbulence/turbulence.toml b/setups/srpic/turbulence/turbulence.toml index a28afde15..a1f8e29c1 100644 --- a/setups/srpic/turbulence/turbulence.toml +++ b/setups/srpic/turbulence/turbulence.toml @@ -1,21 +1,21 @@ [simulation] - name = "turbulence" - engine = "srpic" + name = "turbulence" + engine = "srpic" runtime = 20.0 [grid] resolution = [184, 184, 184] - extent = [[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]] + extent = [[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 0.02 + larmor0 = 0.02 skindepth0 = 0.02 [algorithms] @@ -28,22 +28,22 @@ ppc0 = 32.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e8 + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e8 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e8 + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e8 [setup] - + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.1 - + [output.fields] quantities = ["N_1", "N_2", "E", "B", "J", "T00_1", "T00_2"] diff --git a/setups/srpic/weibel/weibel.toml b/setups/srpic/weibel/weibel.toml index c8e2506f6..23d119b24 100644 --- a/setups/srpic/weibel/weibel.toml +++ b/setups/srpic/weibel/weibel.toml @@ -1,21 +1,21 @@ [simulation] - name = "weibel" - engine = "srpic" + name = "weibel" + engine = "srpic" runtime = 100.0 [grid] resolution = [512, 512] - extent = [[-10.0, 10.0], [-10.0, 10.0]] + extent = [[-10.0, 10.0], [-10.0, 10.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 1.0 + larmor0 = 1.0 skindepth0 = 1.0 [algorithms] @@ -28,37 +28,37 @@ ppc0 = 16.0 [[particles.species]] - label = "e-_p" - mass = 1.0 - charge = -1.0 + label = "e-_p" + mass = 1.0 + charge = -1.0 maxnpart = 1e7 [[particles.species]] - label = "e+_p" - mass = 1.0 - charge = 1.0 + label = "e+_p" + mass = 1.0 + charge = 1.0 maxnpart = 1e7 [[particles.species]] - label = "e-_b" - mass = 1.0 - charge = -1.0 + label = "e-_b" + mass = 1.0 + charge = -1.0 maxnpart = 1e7 [[particles.species]] - label = "e+_b" - mass = 1.0 - charge = 1.0 + label = "e+_b" + mass = 1.0 + charge = 1.0 maxnpart = 1e7 [setup] - drift_u_1 = 0.2 - drift_u_2 = 0.2 - temp_1 = 1e-4 - temp_2 = 1e-4 - + drift_u_1 = 0.2 + drift_u_2 = 0.2 + temp_1 = 1e-4 + temp_2 = 1e-4 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.25 [output.fields] From e2644a691eab9107b9cd338f2804e82391576a80 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 22 Jan 2025 15:21:41 -0500 Subject: [PATCH 081/124] rm old sorting + added new comm --- src/engines/engine.hpp | 6 +- src/engines/engine_printer.cpp | 4 +- src/engines/engine_run.cpp | 15 +- src/engines/srpic.hpp | 23 +- src/framework/containers/particles.cpp | 184 +++-- src/framework/containers/particles.h | 23 +- src/framework/domain/comm_mpi.hpp | 895 ++++++------------------ src/framework/domain/communications.cpp | 755 ++++---------------- src/framework/domain/domain.h | 11 +- src/framework/domain/metadomain.cpp | 27 - src/framework/domain/metadomain.h | 6 +- src/framework/domain/output.cpp | 13 +- src/framework/parameters.cpp | 19 +- src/framework/tests/parameters.cpp | 105 +-- src/global/arch/directions.h | 126 ++-- src/global/arch/kokkos_aliases.cpp | 52 +- src/global/defaults.h | 8 +- src/global/global.cpp | 12 +- src/global/global.h | 2 +- src/global/utils/diag.cpp | 11 +- src/global/utils/diag.h | 6 +- src/global/utils/timer.cpp | 13 +- src/kernels/particle_pusher_sr.hpp | 2 +- 23 files changed, 674 insertions(+), 1644 deletions(-) diff --git a/src/engines/engine.hpp b/src/engines/engine.hpp index 5b7caa502..dac553dcd 100644 --- a/src/engines/engine.hpp +++ b/src/engines/engine.hpp @@ -55,10 +55,12 @@ namespace ntt { static_assert(user::PGen::is_pgen, "unrecognized problem generator"); protected: -#if MPI_ENABLED +#if defined(OUTPUT_ENABLED) + #if defined(MPI_ENABLED) adios2::ADIOS m_adios { MPI_COMM_WORLD }; -#else + #else adios2::ADIOS m_adios; + #endif #endif SimulationParams m_params; diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index 2608ea2f6..4b6ed42d7 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -105,8 +105,8 @@ namespace ntt { color::RESET); } - auto bytes_to_human_readable(std::size_t bytes) - -> std::pair { + auto bytes_to_human_readable( + std::size_t bytes) -> std::pair { const std::vector units { "B", "KB", "MB", "GB", "TB" }; std::size_t unit_idx = 0; auto size = static_cast(bytes); diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index bec5b8652..1db2de2ca 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -26,8 +26,8 @@ namespace ntt { "CurrentFiltering", "CurrentDeposit", "ParticlePusher", "FieldBoundaries", "ParticleBoundaries", "Communications", - "Injector", "Sorting", - "Custom", "Output", + "Injector", "Custom", + "PrtlClear", "Output", "Checkpoint" }, []() { Kokkos::fence(); @@ -37,9 +37,9 @@ namespace ntt { const auto diag_interval = m_params.get( "diagnostics.interval"); - auto time_history = pbar::DurationHistory { 1000 }; - const auto sort_interval = m_params.template get( - "particles.sort_interval"); + auto time_history = pbar::DurationHistory { 1000 }; + const auto clear_interval = m_params.template get( + "particles.clear_interval"); // main algorithm loop while (step < max_steps) { @@ -56,7 +56,8 @@ namespace ntt { }); timers.stop("Custom"); } - auto print_sorting = (sort_interval > 0 and step % sort_interval == 0); + auto print_prtl_clear = (clear_interval > 0 and + step % clear_interval == 0 and step > 0); // advance time & step time += dt; @@ -109,7 +110,7 @@ namespace ntt { m_metadomain.species_labels(), m_metadomain.l_npart_perspec(), m_metadomain.l_maxnpart_perspec(), - print_sorting, + print_prtl_clear, print_output, print_checkpoint, m_params.get("diagnostics.colored_stdout")); diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 0489d8508..b54291540 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -80,8 +80,8 @@ namespace ntt { "algorithms.toggles.fieldsolver"); const auto deposit_enabled = m_params.template get( "algorithms.toggles.deposit"); - const auto sort_interval = m_params.template get( - "particles.sort_interval"); + const auto clear_interval = m_params.template get( + "particles.clear_interval"); if (step == 0) { // communicate fields and apply BCs on the first timestep @@ -126,15 +126,8 @@ namespace ntt { timers.stop("CurrentFiltering"); } - // Tags are assigned by now - if (step == 0){ - m_metadomain.SetParticleIDs(dom); - } - timers.start("Communications"); - if ((sort_interval > 0) and (step % sort_interval == 0)) { - m_metadomain.CommunicateParticlesBuffer(dom, &timers); - } + m_metadomain.CommunicateParticles(dom); timers.stop("Communications"); } @@ -176,12 +169,10 @@ namespace ntt { timers.stop("Injector"); } - if (step % 100 == 0 && step > 0){ - MPI_Barrier(MPI_COMM_WORLD); - timers.start("RemoveDead"); - m_metadomain.RemoveDeadParticles(dom, &timers); - timers.stop("RemoveDead"); - MPI_Barrier(MPI_COMM_WORLD); + if (clear_interval > 0 and step % clear_interval == 0 and step > 0) { + timers.start("PrtlClear"); + m_metadomain.RemoveDeadParticles(dom); + timers.stop("PrtlClear"); } } diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index 1cb63bf43..758118d6c 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -10,7 +10,9 @@ #include #include +#include +#include #include #include @@ -47,9 +49,6 @@ namespace ntt { tag = array_t { label + "_tag", maxnpart }; tag_h = Kokkos::create_mirror_view(tag); - particleID = array_t {label + "_particleID", maxnpart}; - particleID_h = Kokkos::create_mirror_view(particleID); - for (unsigned short n { 0 }; n < npld; ++n) { pld.push_back(array_t("pld", maxnpart)); pld_h.push_back(Kokkos::create_mirror_view(pld[n])); @@ -81,93 +80,150 @@ namespace ntt { } template - auto Particles::npart_per_tag() const -> std::pair, - array_t>{ + auto Particles::NpartsPerTagAndOffsets() const + -> std::pair, array_t> { auto this_tag = tag; - array_t npart_tag("npart_tags", ntags()); + const auto num_tags = ntags(); + array_t npptag("nparts_per_tag", ntags()); - // Print tag_h array - auto tag_host = Kokkos::create_mirror_view(tag); - Kokkos::deep_copy(tag_host, tag); - auto npart_tag_scatter = Kokkos::Experimental::create_scatter_view(npart_tag); + // count # of particles per each tag + auto npptag_scat = Kokkos::Experimental::create_scatter_view(npptag); Kokkos::parallel_for( "NpartPerTag", - npart(), + rangeActiveParticles(), Lambda(index_t p) { - auto npart_tag_scatter_access = npart_tag_scatter.access(); - npart_tag_scatter_access((int)(this_tag(p))) += 1; + auto npptag_acc = npptag_scat.access(); + if (this_tag(p) < 0 || this_tag(p) >= num_tags) { + raise::KernelError(HERE, "Invalid tag value"); + } + npptag_acc(this_tag(p)) += 1; }); - Kokkos::Experimental::contribute(npart_tag, npart_tag_scatter); + Kokkos::Experimental::contribute(npptag, npptag_scat); + + // copy the count to a vector on the host + auto npptag_h = Kokkos::create_mirror_view(npptag); + Kokkos::deep_copy(npptag_h, npptag); + std::vector npptag_vec(num_tags); + for (auto t { 0u }; t < num_tags; ++t) { + npptag_vec[t] = npptag_h(t); + } - auto npart_tag_host = Kokkos::create_mirror_view(npart_tag); - Kokkos::deep_copy(npart_tag_host, npart_tag); - array_t tag_offset("tag_offset", ntags()); - auto tag_offset_host = Kokkos::create_mirror_view(tag_offset); + // count the offsets on the host and copy to device + array_t tag_offset("tag_offset", num_tags - 3); + auto tag_offset_h = Kokkos::create_mirror_view(tag_offset); - std::vector npart_tag_vec(ntags()); - for (std::size_t t { 0 }; t < ntags(); ++t) { - npart_tag_vec[t] = npart_tag_host(t); - tag_offset_host(t) = (t > 0) ? npart_tag_vec[t - 1] : 0; - } - for (std::size_t t { 0 }; t < ntags(); ++t) { - tag_offset_host(t) += (t > 0) ? tag_offset_host(t - 1) : 0; + for (auto t { 0u }; t < num_tags - 3; ++t) { + tag_offset_h(t) = npptag_vec[t + 2] + (t > 0u ? tag_offset_h(t - 1) : 0); } - Kokkos::deep_copy(tag_offset, tag_offset_host); - return std::make_pair(npart_tag_vec, tag_offset); + Kokkos::deep_copy(tag_offset, tag_offset_h); + + return { npptag_vec, tag_offset }; } - template - auto Particles::SortByTags() -> std::vector { - if (npart() == 0 || is_sorted()) { - return npart_per_tag().first; - } - using KeyType = array_t; - using BinOp = sort::BinTag; - BinOp bin_op(ntags()); - auto slice = range_tuple_t(0, npart()); - Kokkos::BinSort Sorter(Kokkos::subview(tag, slice), bin_op, false); - Sorter.create_permute_vector(); + template + void RemoveDeadInArray(array_t& arr, + const array_t& indices_alive) { + auto n_alive = indices_alive.extent(0); + auto buffer = Kokkos::View("buffer", n_alive); + Kokkos::parallel_for( + "PopulateBufferAlive", + n_alive, + Lambda(index_t p) { buffer(p) = arr(indices_alive(p)); }); - Sorter.sort(Kokkos::subview(i1, slice)); - Sorter.sort(Kokkos::subview(dx1, slice)); - Sorter.sort(Kokkos::subview(i1_prev, slice)); - Sorter.sort(Kokkos::subview(dx1_prev, slice)); - Sorter.sort(Kokkos::subview(ux1, slice)); - Sorter.sort(Kokkos::subview(ux2, slice)); - Sorter.sort(Kokkos::subview(ux3, slice)); + Kokkos::deep_copy( + Kokkos::subview(arr, std::make_pair(static_cast(0), n_alive)), + buffer); + } + + template + void Particles::RemoveDead() { + const auto n_part = npart(); + std::size_t n_alive = 0, n_dead = 0; + auto& this_tag = tag; + + Kokkos::parallel_reduce( + "CountDeadAlive", + rangeActiveParticles(), + Lambda(index_t p, std::size_t & nalive, std::size_t & ndead) { + nalive += (this_tag(p) == ParticleTag::alive); + ndead += (this_tag(p) == ParticleTag::dead); + if (this_tag(p) != ParticleTag::alive and this_tag(p) != ParticleTag::dead) { + raise::KernelError(HERE, "wrong particle tag"); + } + }, + n_alive, + n_dead); + + array_t indices_alive { "indices_alive", n_alive }; + array_t alive_counter { "counter_alive", 1 }; - Sorter.sort(Kokkos::subview(tag, slice)); - Sorter.sort(Kokkos::subview(weight, slice)); + Kokkos::parallel_for( + "AliveIndices", + rangeActiveParticles(), + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive) { + const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); + indices_alive(idx) = p; + } + }); - for (unsigned short n { 0 }; n < npld(); ++n) { - Sorter.sort(Kokkos::subview(pld[n], slice)); + { + auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); + Kokkos::deep_copy(alive_counter_h, alive_counter); + raise::ErrorIf(alive_counter_h(0) != n_alive, + "error in finding alive particle indices", + HERE); } - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - Sorter.sort(Kokkos::subview(i2, slice)); - Sorter.sort(Kokkos::subview(dx2, slice)); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + RemoveDeadInArray(i1, indices_alive); + RemoveDeadInArray(i1_prev, indices_alive); + RemoveDeadInArray(dx1, indices_alive); + RemoveDeadInArray(dx1_prev, indices_alive); + } - Sorter.sort(Kokkos::subview(i2_prev, slice)); - Sorter.sort(Kokkos::subview(dx2_prev, slice)); + if constexpr (D == Dim::_2D or D == Dim::_3D) { + RemoveDeadInArray(i2, indices_alive); + RemoveDeadInArray(i2_prev, indices_alive); + RemoveDeadInArray(dx2, indices_alive); + RemoveDeadInArray(dx2_prev, indices_alive); } + if constexpr (D == Dim::_3D) { - Sorter.sort(Kokkos::subview(i3, slice)); - Sorter.sort(Kokkos::subview(dx3, slice)); + RemoveDeadInArray(i3, indices_alive); + RemoveDeadInArray(i3_prev, indices_alive); + RemoveDeadInArray(dx3, indices_alive); + RemoveDeadInArray(dx3_prev, indices_alive); + } + + RemoveDeadInArray(ux1, indices_alive); + RemoveDeadInArray(ux2, indices_alive); + RemoveDeadInArray(ux3, indices_alive); + RemoveDeadInArray(weight, indices_alive); - Sorter.sort(Kokkos::subview(i3_prev, slice)); - Sorter.sort(Kokkos::subview(dx3_prev, slice)); + if constexpr (D == Dim::_2D && C != Coord::Cart) { + RemoveDeadInArray(phi, indices_alive); } - if ((D == Dim::_2D) && (C != Coord::Cart)) { - Sorter.sort(Kokkos::subview(phi, slice)); + for (auto& payload : pld) { + RemoveDeadInArray(payload, indices_alive); } - auto np_per_tag_tag_offset = npart_per_tag(); - const auto np_per_tag = np_per_tag_tag_offset.first; - set_npart(np_per_tag[(short)(ParticleTag::alive)]); + Kokkos::Experimental::fill( + "TagAliveParticles", + AccelExeSpace(), + Kokkos::subview(this_tag, + std::make_pair(static_cast(0), n_alive)), + ParticleTag::alive); + + Kokkos::Experimental::fill( + "TagDeadParticles", + AccelExeSpace(), + Kokkos::subview(this_tag, std::make_pair(n_alive, n_alive + n_dead)), + ParticleTag::dead); + set_npart(n_alive); m_is_sorted = true; - return np_per_tag; } template diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 131ff45c0..3ae68b402 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -64,8 +64,6 @@ namespace ntt { std::vector> pld; // phi coordinate (for axisymmetry) array_t phi; - // Array to store the particle ids - array_t particleID; // host mirrors array_mirror_t i1_h, i2_h, i3_h; @@ -75,7 +73,6 @@ namespace ntt { array_mirror_t phi_h; array_mirror_t tag_h; std::vector> pld_h; - array_mirror_t particleID_h; // for empty allocation Particles() {} @@ -181,7 +178,6 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); - footprint += sizeof(long) * particleID.extent(0); for (auto& p : pld) { footprint += sizeof(real_t) * p.extent(0); } @@ -191,9 +187,19 @@ namespace ntt { /** * @brief Count the number of particles with a specific tag. - * @return The vector of counts for each tag. + * @return The vector of counts for each tag + offsets + * @note For instance, given the counts: 0 -> n0, 1 -> n1, 2 -> n2, 3 -> n3, + * ... it returns: + * ... [n0, n1, n2, n3, ...] of size ntags + * ... [n2, n2 + n3, n2 + n3 + n4, ...] of size ntags - 3 + * ... so in buffer array: + * ... tag=2 particles are offset by 0 + * ... tag=3 particles are offset by n2 + * ... tag=4 particles are offset by n2 + n3 + * ... etc. */ - auto npart_per_tag() const -> std::pair, array_t>; + auto NpartsPerTagAndOffsets() const + -> std::pair, array_t>; /* setters -------------------------------------------------------------- */ /** @@ -216,10 +222,9 @@ namespace ntt { } /** - * @brief Sort particles by their tags. - * @return The vector of counts per each tag. + * @brief Move dead particles to the end of arrays */ - auto SortByTags() -> std::vector; + void RemoveDead(); /** * @brief Copy particle data from device to host. diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index ce38a8261..370c02b18 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -24,6 +24,7 @@ #include #include +#include #include namespace comm { @@ -283,50 +284,10 @@ namespace comm { } } - template - void CommunicateParticleQuantity(array_t& arr, - int send_rank, - int recv_rank, - const range_tuple_t& send_slice, - const range_tuple_t& recv_slice) { - const std::size_t send_count = send_slice.second - send_slice.first; - const std::size_t recv_count = recv_slice.second - recv_slice.first; - if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and - (recv_count > 0)) { - MPI_Sendrecv(arr.data() + send_slice.first, - send_count, - mpi::get_type(), - send_rank, - 0, - arr.data() + recv_slice.first, - recv_count, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } else if ((send_rank >= 0) and (send_count > 0)) { - MPI_Send(arr.data() + send_slice.first, - send_count, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - } else if ((recv_rank >= 0) and (recv_count > 0)) { - MPI_Recv(arr.data() + recv_slice.first, - recv_count, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } - } - - void ParticleSendRecvCount(int send_rank, - int recv_rank, - const std::size_t& send_count, - std::size_t& recv_count) { + void ParticleSendRecvCount(int send_rank, + int recv_rank, + std::size_t send_count, + std::size_t& recv_count) { if ((send_rank >= 0) && (recv_rank >= 0)) { MPI_Sendrecv(&send_count, 1, @@ -356,644 +317,246 @@ namespace comm { } template - auto CommunicateParticles(Particles& species, - int send_rank, - int recv_rank, - const range_tuple_t& send_slice, - std::size_t& index_last) -> std::size_t { - if ((send_rank < 0) && (recv_rank < 0)) { - raise::Error("No send or recv in CommunicateParticles", HERE); - } - std::size_t recv_count { 0 }; - ParticleSendRecvCount(send_rank, - recv_rank, - send_slice.second - send_slice.first, - recv_count); - raise::FatalIf((index_last + recv_count) >= species.maxnpart(), - "Too many particles to receive (cannot fit into maxptl)", - HERE); - const auto recv_slice = range_tuple_t({ index_last, index_last + recv_count }); - CommunicateParticleQuantity(species.i1, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.dx1, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.i1_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.dx1_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - if constexpr (D == Dim::_2D || D == Dim::_3D) { - CommunicateParticleQuantity(species.i2, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.dx2, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.i2_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.dx2_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - } - if constexpr (D == Dim::_3D) { - CommunicateParticleQuantity(species.i3, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.dx3, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.i3_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.dx3_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - } - CommunicateParticleQuantity(species.ux1, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.ux2, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.ux3, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.weight, - send_rank, - recv_rank, - send_slice, - recv_slice); - if constexpr (D == Dim::_2D and C != Coord::Cart) { - CommunicateParticleQuantity(species.phi, - send_rank, - recv_rank, - send_slice, - recv_slice); - } - for (auto p { 0 }; p < species.npld(); ++p) { - CommunicateParticleQuantity(species.pld[p], - send_rank, - recv_rank, - send_slice, - recv_slice); - } - return recv_count; - } + void CommunicateParticles(Particles& species, + Kokkos::View outgoing_indices, + Kokkos::View tag_offsets, + std::vector npptag_vec, + std::vector npptag_recv_vec, + std::vector send_ranks, + std::vector recv_ranks, + const dir::dirs_t& dirs_to_comm) { + // Pointers to the particle data arrays + auto& this_i1 = species.i1; + auto& this_i1_prev = species.i1_prev; + auto& this_i2 = species.i2; + auto& this_i2_prev = species.i2_prev; + auto& this_i3 = species.i3; + auto& this_i3_prev = species.i3_prev; + auto& this_dx1 = species.dx1; + auto& this_dx1_prev = species.dx1_prev; + auto& this_dx2 = species.dx2; + auto& this_dx2_prev = species.dx2_prev; + auto& this_dx3 = species.dx3; + auto& this_dx3_prev = species.dx3_prev; + auto& this_phi = species.phi; + auto& this_ux1 = species.ux1; + auto& this_ux2 = species.ux2; + auto& this_ux3 = species.ux3; + auto& this_weight = species.weight; + auto& this_tag = species.tag; + + // number of arrays of each type to send/recv + const unsigned short NREALS = 4 + static_cast( + D == Dim::_2D and C != Coord::Cart); + const unsigned short NINTS = 2 * static_cast(D); + const unsigned short NPRTLDX = 2 * static_cast(D); + const unsigned short NPLD = species.npld(); + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + // buffers to store recv data + const auto npart_alive = npptag_vec[ParticleTag::alive]; + const auto npart_dead = npptag_vec[ParticleTag::dead]; + const auto npart_send = outgoing_indices.extent(0) - npart_dead; + const auto npart_recv = std::accumulate(npptag_recv_vec.begin(), + npptag_recv_vec.end(), + static_cast(0)); - template - void CommunicateParticlesBuffer(Particles& species, - Kokkos::View permute_vector, - Kokkos::View allocation_vector, - Kokkos::View tag_offset, - std::vector npart_per_tag_arr, - std::vector npart_per_tag_arr_recv, - std::vector send_ranks, - std::vector recv_ranks, - const dir::dirs_t& legal_directions) { - // Pointers to the particle data arrays - auto &this_ux1 = species.ux1; - auto &this_ux2 = species.ux2; - auto &this_ux3 = species.ux3; - auto &this_weight = species.weight; - auto &this_phi = species.phi; - auto &this_i1 = species.i1; - auto &this_i1_prev = species.i1_prev; - auto &this_i2 = species.i2; - auto &this_i3 = species.i3; - auto &this_i2_prev = species.i2_prev; - auto &this_i3_prev = species.i3_prev; - auto &this_dx1 = species.dx1; - auto &this_dx1_prev = species.dx1_prev; - auto &this_dx2 = species.dx2; - auto &this_dx3 = species.dx3; - auto &this_dx2_prev = species.dx2_prev; - auto &this_dx3_prev = species.dx3_prev; - auto &this_tag = species.tag; - auto &this_particleID = species.particleID; - - // Number of arrays of each type to send/recv - auto NREALS = 4; - auto NINTS = 2; - auto NFLOATS = 2; - auto NLONGS = 2; - if constexpr (D == Dim::_2D) { - if (C != Coord::Cart) { - NREALS = 5; - NINTS = 4; - NFLOATS = 4; - this_phi = species.phi; - } else { - NREALS = 4; - NINTS = 4; - NFLOATS = 4; - } - } - if constexpr (D == Dim::_3D) { - NREALS = 4; - NINTS = 6; - NFLOATS = 6; - } + Kokkos::View recv_buff_int { "recv_buff_int", npart_recv * NINTS }; + Kokkos::View recv_buff_real { "recv_buff_real", npart_recv * NREALS }; + Kokkos::View recv_buff_prtldx { "recv_buff_prtldx", + npart_recv * NPRTLDX }; - // Now make buffers to store recevied data (don't need global send buffers) - const auto total_send = permute_vector.extent(0) - npart_per_tag_arr[ParticleTag::dead]; - const auto total_recv = allocation_vector.extent(0); - const auto n_alive = npart_per_tag_arr[ParticleTag::alive]; - const auto n_dead = npart_per_tag_arr[ParticleTag::dead]; - - // Debug test: print send and recv count - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Total send: %d, Total recv: %d \n", rank, total_send, total_recv); - } - /* - Brief on recv buffers: Each recv buffer contains all the received arrays of - a given type. The different physical quantities are stored next to each other - to avoid cache misses. The array is structured as follows: - E.g., - recv_buffer_int: | qty1 | qty2 | ... | qtyNINTS | qty1 | qty2 | ... | qtyNINTS | ... - <-------particle to recv1------> <-------particle to recv2--------> - <----------------------------------total_recv----------------------------> - */ - Kokkos::View recv_buffer_int("recv_buffer_int", total_recv * NINTS); - Kokkos::View recv_buffer_real("recv_buffer_real", total_recv * NREALS); - Kokkos::View recv_buffer_prtldx("recv_buffer_prtldx",total_recv * NFLOATS); - Kokkos::View recv_buffer_long("recv_buffer_long", total_recv * NLONGS); - auto recv_buffer_int_h = Kokkos::create_mirror_view(recv_buffer_int); - auto recv_buffer_real_h = Kokkos::create_mirror_view(recv_buffer_real); - auto recv_buffer_prtldx_h = Kokkos::create_mirror_view(recv_buffer_prtldx); - auto recv_buffer_long_h = Kokkos::create_mirror_view(recv_buffer_long); - - auto iteration = 0; + auto iteration = 0; auto current_received = 0; - for (const auto& direction : legal_directions) { - const auto send_rank = send_ranks[iteration]; - const auto recv_rank = recv_ranks[iteration]; - const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); - const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); - const auto send_count = npart_per_tag_arr[tag_send]; - const auto recv_count = npart_per_tag_arr_recv[tag_recv]; - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - } + + for (const auto& direction : dirs_to_comm) { + const auto send_rank = send_ranks[iteration]; + const auto recv_rank = recv_ranks[iteration]; + const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto npart_send_in = npptag_vec[tag_send]; + const auto npart_recv_in = npptag_recv_vec[tag_recv - 2]; if (send_rank < 0 and recv_rank < 0) { continue; } - Kokkos::View send_buffer_int("send_buffer_int", send_count * NINTS); - Kokkos::View send_buffer_real("send_buffer_real", send_count * NREALS); - Kokkos::View send_buffer_prtldx("send_buffer_prtldx",send_count * NFLOATS); - Kokkos::View send_buffer_long("send_buffer_long", send_count * NLONGS); - auto send_buffer_int_h = Kokkos::create_mirror_view(send_buffer_int); - auto send_buffer_real_h = Kokkos::create_mirror_view(send_buffer_real); - auto send_buffer_prtldx_h = Kokkos::create_mirror_view(send_buffer_prtldx); - auto send_buffer_long_h = Kokkos::create_mirror_view(send_buffer_long); - - // Need different constexpr parallel fors for different dims - if constexpr(D == Dim::_1D) { - Kokkos::parallel_for( - "PopulateSendBuffer", - send_count, - Lambda(const std::size_t p){ - const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); - send_buffer_int(NINTS * p + 0) = this_i1(idx); - send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); - send_buffer_real(NREALS * p + 0) = this_ux1(idx); - send_buffer_real(NREALS * p + 1) = this_ux2(idx); - send_buffer_real(NREALS * p + 2) = this_ux3(idx); - send_buffer_real(NREALS * p + 3) = this_weight(idx); - send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); - send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); - send_buffer_long(NLONGS * p + 0) = this_particleID(idx); - send_buffer_long(NLONGS * p + 1) = this_tag(idx); - this_tag(idx) = ParticleTag::dead; - }); - } - if constexpr(D == Dim::_2D && C == Coord::Cart) { - Kokkos::parallel_for( - "PopulateSendBuffer", - send_count, - Lambda(const std::size_t p){ - const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); - send_buffer_int(NINTS * p + 0) = this_i1(idx); - send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); - send_buffer_int(NINTS * p + 2) = this_i2(idx); - send_buffer_int(NINTS * p + 3) = this_i2_prev(idx); - send_buffer_real(NREALS * p + 0) = this_ux1(idx); - send_buffer_real(NREALS * p + 1) = this_ux2(idx); - send_buffer_real(NREALS * p + 2) = this_ux3(idx); - send_buffer_real(NREALS * p + 3) = this_weight(idx); - send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); - send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); - send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); - send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); - send_buffer_long(NLONGS * p + 0) = this_particleID(idx); - send_buffer_long(NLONGS * p + 1) = this_tag(idx); - this_tag(idx) = ParticleTag::dead; - }); - } - if constexpr(D == Dim::_2D && C != Coord::Cart) { - Kokkos::parallel_for( - "PopulateSendBuffer", - send_count, - Lambda(const std::size_t p){ - const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); - send_buffer_int(NINTS * p + 0) = this_i1(idx); - send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); - send_buffer_int(NINTS * p + 2) = this_i2(idx); - send_buffer_int(NINTS * p + 3) = this_i2_prev(idx); - send_buffer_real(NREALS * p + 0) = this_ux1(idx); - send_buffer_real(NREALS * p + 1) = this_ux2(idx); - send_buffer_real(NREALS * p + 2) = this_ux3(idx); - send_buffer_real(NREALS * p + 3) = this_weight(idx); - send_buffer_real(NREALS * p + 4) = this_phi(idx); - send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); - send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); - send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); - send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); - send_buffer_long(NLONGS * p + 0) = this_particleID(idx); - send_buffer_long(NLONGS * p + 1) = this_tag(idx); - this_tag(idx) = ParticleTag::dead; - }); - } - if constexpr(D == Dim::_3D) { - Kokkos::parallel_for( - "PopulateSendBuffer", - send_count, - Lambda(const std::size_t p){ - const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); - send_buffer_int(NINTS * p + 0) = this_i1(idx); - send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); - send_buffer_int(NINTS * p + 2) = this_i2(idx); - send_buffer_int(NINTS * p + 3) = this_i2_prev(idx); - send_buffer_int(NINTS * p + 4) = this_i3(idx); - send_buffer_int(NINTS * p + 5) = this_i3_prev(idx); - send_buffer_real(NREALS * p + 0) = this_ux1(idx); - send_buffer_real(NREALS * p + 1) = this_ux2(idx); - send_buffer_real(NREALS * p + 2) = this_ux3(idx); - send_buffer_real(NREALS * p + 3) = this_weight(idx); - send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); - send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); - send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); - send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); - send_buffer_prtldx(NFLOATS * p + 4) = this_dx3(idx); - send_buffer_prtldx(NFLOATS * p + 5) = this_dx3_prev(idx); - send_buffer_long(NLONGS * p + 0) = this_particleID(idx); - send_buffer_long(NLONGS * p + 1) = this_tag(idx); - this_tag(idx) = ParticleTag::dead; - }); - } - - auto tag_offset_h = Kokkos::create_mirror_view(tag_offset); - Kokkos::deep_copy(tag_offset_h, tag_offset); - /* - Brief on receive offset: - The receive buffer looks like this - <-----------------------------------> - |NINT|NINT|NINT|NINT|NINT|NINT|NINT|NINT|...xnrecv - <--------><--------><--------><--------> - recv1 recv2 recv3 recv4 - |________| - ^ ^ - offset offset + nrecv - */ - const auto receive_offset_int = current_received * NINTS; - const auto receive_offset_real = current_received * NREALS; - const auto receive_offset_prtldx = current_received * NFLOATS; - const auto receive_offset_long = current_received * NLONGS; - // Comms - // Make host arrays for send and recv buffers - Kokkos::deep_copy(send_buffer_int_h, send_buffer_int); - Kokkos::deep_copy(send_buffer_real_h, send_buffer_real); - Kokkos::deep_copy(send_buffer_prtldx_h, send_buffer_prtldx); - Kokkos::deep_copy(send_buffer_long_h, send_buffer_long); - - if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and - (recv_count > 0)) { - // Debug: Print the rank and type of mpi operation performed - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing sendrecv operation, direction %d \n", rank, direction); - } - MPI_Sendrecv(send_buffer_int.data(), - send_count * NINTS, - mpi::get_type(), - send_rank, - 0, - recv_buffer_int.data() + receive_offset_int, - recv_count*NINTS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buffer_real.data(), - send_count * NREALS, - mpi::get_type(), - send_rank, - 0, - recv_buffer_real.data() + receive_offset_real, - recv_count*NREALS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buffer_prtldx.data(), - send_count * NFLOATS, - mpi::get_type(), - send_rank, - 0, - recv_buffer_prtldx.data() + receive_offset_prtldx, - recv_count*NFLOATS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buffer_long.data(), - send_count * NLONGS, - mpi::get_type(), - send_rank, - 0, - recv_buffer_long.data() + receive_offset_long, - recv_count*NLONGS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } else if ((send_rank >= 0) and (send_count > 0)) { - // Debug: Print the rank and type of mpi operation performed - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing send operation, direction %d \n", rank, direction); - } - MPI_Send(send_buffer_int.data(), - send_count * NINTS, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - MPI_Send(send_buffer_real.data(), - send_count * NREALS, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - MPI_Send(send_buffer_prtldx.data(), - send_count * NFLOATS, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - MPI_Send(send_buffer_long.data(), - send_count * NLONGS, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - } else if ((recv_rank >= 0) and (recv_count > 0)) { - // Debug: Print the rank and type of mpi operation performed - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing recv operation, direction %d \n", rank, direction); - } - MPI_Recv(recv_buffer_int.data() + receive_offset_int, - recv_count * NINTS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer_real.data() + receive_offset_real, - recv_count * NREALS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer_prtldx.data() + receive_offset_prtldx, - recv_count * NFLOATS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer_long.data() + receive_offset_long, - recv_count * NLONGS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } - current_received += recv_count; - iteration++; - - // Debug test: Print recv buffer before and after - /* - { - int total_ranks; - MPI_Comm_size(MPI_COMM_WORLD, &total_ranks); - for (int allranks=0; allranks send_buff_int { "send_buff_int", npart_send_in * NINTS }; + Kokkos::View send_buff_real { "send_buff_real", + npart_send_in * NREALS }; + Kokkos::View send_buff_prtldx { "send_buff_prtldx", + npart_send_in * NPRTLDX }; + Kokkos::parallel_for( + "PopulateSendBuffer", + npart_send_in, + Lambda(index_t p) { + const auto idx = outgoing_indices( + (tag_send > 2 ? tag_offsets(tag_send - 3) : 0) + npart_dead + p); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + send_buff_int(NINTS * p + 0) = this_i1(idx); + send_buff_int(NINTS * p + 1) = this_i1_prev(idx); + send_buff_prtldx(NPRTLDX * p + 0) = this_dx1(idx); + send_buff_prtldx(NPRTLDX * p + 1) = this_dx1_prev(idx); } - auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); - Kokkos::deep_copy(allocation_vector_h, allocation_vector); - - for (int i=0; i= 0) and (recv_rank >= 0) and (npart_send_in > 0) and + (npart_recv_in > 0)) { + raise::ErrorIf(recv_offset_int + npart_recv_in * NINTS > + recv_buff_int.extent(0), + "incorrect # of recv particles", + HERE); + MPI_Sendrecv(send_buff_int.data(), + npart_send_in * NINTS, + mpi::get_type(), + send_rank, + 0, + recv_buff_int.data() + recv_offset_int, + npart_recv_in * NINTS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buff_real.data(), + npart_send_in * NREALS, + mpi::get_type(), + send_rank, + 0, + recv_buff_real.data() + recv_offset_real, + npart_recv_in * NREALS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buff_prtldx.data(), + npart_send_in * NPRTLDX, + mpi::get_type(), + send_rank, + 0, + recv_buff_prtldx.data() + recv_offset_prtldx, + npart_recv_in * NPRTLDX, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } else if ((send_rank >= 0) and (npart_send_in > 0)) { + MPI_Send(send_buff_int.data(), + npart_send_in * NINTS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + MPI_Send(send_buff_real.data(), + npart_send_in * NREALS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + MPI_Send(send_buff_prtldx.data(), + npart_send_in * NPRTLDX, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + } else if ((recv_rank >= 0) and (npart_recv_in > 0)) { + raise::ErrorIf(recv_offset_int + npart_recv_in * NINTS > + recv_buff_int.extent(0), + "incorrect # of recv particles", + HERE); + MPI_Recv(recv_buff_int.data() + recv_offset_int, + npart_recv_in * NINTS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Recv(recv_buff_real.data() + recv_offset_real, + npart_recv_in * NREALS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Recv(recv_buff_prtldx.data() + recv_offset_prtldx, + npart_recv_in * NPRTLDX, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); } - } - */ - - } // end over direction loop - /*Kokkos::deep_copy(recv_buffer_int, recv_buffer_int_h); - Kokkos::deep_copy(recv_buffer_real, recv_buffer_real_h); - Kokkos::deep_copy(recv_buffer_prtldx, recv_buffer_prtldx_h);*/ - if constexpr (D == Dim::_1D) - { - Kokkos::parallel_for( - "PopulateFromRecvBuffer", - total_recv, - Lambda(const std::size_t p){ - auto idx = allocation_vector(p); - this_tag(idx) = ParticleTag::alive; - this_i1(idx) = recv_buffer_int(NINTS * p + 0); - this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); - this_ux1(idx) = recv_buffer_real(NREALS * p + 0); - this_ux2(idx) = recv_buffer_real(NREALS * p + 1); - this_ux3(idx) = recv_buffer_real(NREALS * p + 2); - this_weight(idx) = recv_buffer_real(NREALS * p + 3); - this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); - this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); - this_particleID(idx) = recv_buffer_long(NLONGS * p + 0); - }); - } + current_received += npart_recv_in; + iteration++; - if constexpr (D == Dim::_2D && C == Coord::Cart) - { - Kokkos::parallel_for( - "PopulateFromRecvBuffer", - total_recv, - Lambda(const std::size_t p){ - auto idx = allocation_vector(p); - this_tag(idx) = ParticleTag::alive; - this_i1(idx) = recv_buffer_int(NINTS * p + 0); - this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); - this_i2(idx) = recv_buffer_int(NINTS * p + 2); - this_i2_prev(idx) = recv_buffer_int(NINTS * p + 3); - this_ux1(idx) = recv_buffer_real(NREALS * p + 0); - this_ux2(idx) = recv_buffer_real(NREALS * p + 1); - this_ux3(idx) = recv_buffer_real(NREALS * p + 2); - this_weight(idx) = recv_buffer_real(NREALS * p + 3); - this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); - this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); - this_dx2(idx) = recv_buffer_prtldx(NFLOATS * p + 2); - this_dx2_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 3); - this_particleID(idx) = recv_buffer_long(NLONGS * p + 0); - }); - } + } // end direction loop - if constexpr (D == Dim::_2D && C != Coord::Cart) - { - Kokkos::parallel_for( - "PopulateFromRecvBuffer", - total_recv, - Lambda(const std::size_t p){ - auto idx = allocation_vector(p); - this_tag(idx) = ParticleTag::alive; - this_i1(idx) = recv_buffer_int(NINTS * p + 0); - this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); - this_i2(idx) = recv_buffer_int(NINTS * p + 2); - this_i2_prev(idx) = recv_buffer_int(NINTS * p + 3); - this_ux1(idx) = recv_buffer_real(NREALS * p + 0); - this_ux2(idx) = recv_buffer_real(NREALS * p + 1); - this_ux3(idx) = recv_buffer_real(NREALS * p + 2); - this_weight(idx) = recv_buffer_real(NREALS * p + 3); - this_phi(idx) = recv_buffer_real(NREALS * p + 4); - this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); - this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); - this_dx2(idx) = recv_buffer_prtldx(NFLOATS * p + 2); - this_dx2_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 3); - this_particleID(idx) = recv_buffer_long(NLONGS * p + 0); - }); - } + const auto npart = species.npart(); + const auto npart_holes = outgoing_indices.extent(0); - if constexpr (D == Dim::_3D) - { - Kokkos::parallel_for( + Kokkos::parallel_for( "PopulateFromRecvBuffer", - total_recv, - Lambda(const std::size_t p){ - auto idx = allocation_vector(p); - this_tag(idx) = ParticleTag::alive; - this_i1(idx) = recv_buffer_int(NINTS * p + 0); - this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); - this_i2(idx) = recv_buffer_int(NINTS * p + 2); - this_i2_prev(idx) = recv_buffer_int(NINTS * p + 3); - this_i3(idx) = recv_buffer_int(NINTS * p + 4); - this_i3_prev(idx) = recv_buffer_int(NINTS * p + 5); - this_ux1(idx) = recv_buffer_real(NREALS * p + 0); - this_ux2(idx) = recv_buffer_real(NREALS * p + 1); - this_ux3(idx) = recv_buffer_real(NREALS * p + 2); - this_weight(idx) = recv_buffer_real(NREALS * p + 3); - this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); - this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); - this_dx2(idx) = recv_buffer_prtldx(NFLOATS * p + 2); - this_dx2_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 3); - this_dx3(idx) = recv_buffer_prtldx(NFLOATS * p + 4); - this_dx3_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 5); - this_particleID(idx) = recv_buffer_long(NLONGS * p + 0); - }); - } - species.set_npart(species.npart() + std::max(permute_vector.extent(0), - allocation_vector.extent(0)) - permute_vector.extent(0)); - /* - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - // Print the total number of particles after each pass - int species_npart = species.npart(); - int global_species_npart = 0; - // Reduce all local sums into global_sum on rank 0 - MPI_Reduce(&species_npart, &global_species_npart, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - int total_ranks; - MPI_Comm_size(MPI_COMM_WORLD, &total_ranks); - for (int allranks=0; allranks= npart_holes ? npart + p - npart_holes + : outgoing_indices(p)); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + this_i1(idx) = recv_buff_int(NINTS * p + 0); + this_i1_prev(idx) = recv_buff_int(NINTS * p + 1); + this_dx1(idx) = recv_buff_prtldx(NPRTLDX * p + 0); + this_dx1_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 1); } - } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + this_i2(idx) = recv_buff_int(NINTS * p + 2); + this_i2_prev(idx) = recv_buff_int(NINTS * p + 3); + this_dx2(idx) = recv_buff_prtldx(NPRTLDX * p + 2); + this_dx2_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 3); + } + if constexpr (D == Dim::_3D) { + this_i3(idx) = recv_buff_int(NINTS * p + 4); + this_i3_prev(idx) = recv_buff_int(NINTS * p + 5); + this_dx3(idx) = recv_buff_prtldx(NPRTLDX * p + 4); + this_dx3_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 5); + } + this_ux1(idx) = recv_buff_real(NREALS * p + 0); + this_ux2(idx) = recv_buff_real(NREALS * p + 1); + this_ux3(idx) = recv_buff_real(NREALS * p + 2); + this_weight(idx) = recv_buff_real(NREALS * p + 3); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + this_phi(idx) = recv_buff_real(NREALS * p + 4); + } + this_tag(idx) = ParticleTag::alive; + }); + + if (npart_recv > npart_holes) { + species.set_npart(npart + npart_recv - npart_holes); } - */ - return; -} + } } // namespace comm diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 390c27fa8..6175cc4bb 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -24,6 +24,8 @@ #include "framework/domain/comm_nompi.hpp" #endif +#include + #include #include @@ -33,10 +35,10 @@ namespace ntt { using comm_params_t = std::pair>; template - auto GetSendRecvRanks(Metadomain* metadomain, - Domain& domain, - dir::direction_t direction) - -> std::pair { + auto GetSendRecvRanks( + Metadomain* metadomain, + Domain& domain, + dir::direction_t direction) -> std::pair { Domain* send_to_nghbr_ptr = nullptr; Domain* recv_from_nghbr_ptr = nullptr; // set pointers to the correct send/recv domains @@ -86,8 +88,8 @@ namespace ntt { } else { // no communication necessary return { - {0, -1}, - {0, -1} + { 0, -1 }, + { 0, -1 } }; } #if defined(MPI_ENABLED) @@ -110,17 +112,17 @@ namespace ntt { (void)send_rank; (void)recv_rank; return { - {send_ind, send_rank}, - {recv_ind, recv_rank} + { send_ind, send_rank }, + { recv_ind, recv_rank } }; } template - auto GetSendRecvParams(Metadomain* metadomain, - Domain& domain, - dir::direction_t direction, - bool synchronize) - -> std::pair { + auto GetSendRecvParams( + Metadomain* metadomain, + Domain& domain, + dir::direction_t direction, + bool synchronize) -> std::pair { const auto [send_indrank, recv_indrank] = GetSendRecvRanks(metadomain, domain, direction); const auto [send_ind, send_rank] = send_indrank; @@ -129,8 +131,8 @@ namespace ntt { const auto is_receiving = (recv_rank >= 0); if (not(is_sending or is_receiving)) { return { - {{ 0, -1 }, {}}, - {{ 0, -1 }, {}} + { { 0, -1 }, {} }, + { { 0, -1 }, {} } }; } auto send_slice = std::vector {}; @@ -196,8 +198,8 @@ namespace ntt { } return { - {{ send_ind, send_rank }, send_slice}, - {{ recv_ind, recv_rank }, recv_slice}, + { { send_ind, send_rank }, send_slice }, + { { recv_ind, recv_rank }, recv_slice }, }; } @@ -492,638 +494,177 @@ namespace ntt { } template - void Metadomain::CommunicateParticles(Domain& domain, - timer::Timers* timers) { - raise::ErrorIf(timers == nullptr, - "Timers not passed when Comm::Prtl called", - HERE); - logger::Checkpoint("Communicating particles\n", HERE); - for (auto& species : domain.species) { - // at this point particles should already by tagged in the pusher - timers->start("Sorting"); - const auto npart_per_tag = species.SortByTags(); - timers->stop("Sorting"); + void Metadomain::CommunicateParticles(Domain& domain) { #if defined(MPI_ENABLED) - timers->start("Communications"); - // only necessary when MPI is enabled - /** - * index_last - * | - * alive new dead tag1 tag2 v dead - * [ 11111111 000000000 222222222 3333333 .... nnnnnnn 00000000 ... ] - * ^ ^ - * | | - * tag_offset[tag1] -----+ +----- tag_offset[tag1] + npart_per_tag[tag1] - * "send_pmin" "send_pmax" (after last element) - */ - auto tag_offset { npart_per_tag }; - for (std::size_t i { 1 }; i < tag_offset.size(); ++i) { - tag_offset[i] += tag_offset[i - 1]; - } - for (std::size_t i { 0 }; i < tag_offset.size(); ++i) { - tag_offset[i] -= npart_per_tag[i]; - } - auto index_last = tag_offset[tag_offset.size() - 1] + - npart_per_tag[npart_per_tag.size() - 1]; - for (auto& direction : dir::Directions::all) { - const auto [send_params, - recv_params] = GetSendRecvParams(this, domain, direction, true); - const auto [send_indrank, send_slice] = send_params; - const auto [recv_indrank, recv_slice] = recv_params; - const auto [send_ind, send_rank] = send_indrank; - const auto [recv_ind, recv_rank] = recv_indrank; - if (send_rank < 0 and recv_rank < 0) { - continue; - } - const auto send_dir_tag = mpi::PrtlSendTag::dir2tag(direction); - const auto nsend = npart_per_tag[send_dir_tag]; - const auto send_pmin = tag_offset[send_dir_tag]; - const auto send_pmax = tag_offset[send_dir_tag] + nsend; - const auto recv_count = comm::CommunicateParticles( - species, - send_rank, - recv_rank, - { send_pmin, send_pmax }, - index_last); - if (recv_count > 0) { - if constexpr (D == Dim::_1D) { - int shift_in_x1 { 0 }; - if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active(in::x1); - } - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; - Kokkos::parallel_for( - "CommunicateParticles", - recv_count, - Lambda(index_t p) { - this_tag(index_last + p) = ParticleTag::alive; - this_i1(index_last + p) += shift_in_x1; - this_i1_prev(index_last + p) += shift_in_x1; - }); - } else if constexpr (D == Dim::_2D) { - int shift_in_x1 { 0 }, shift_in_x2 { 0 }; - if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active()[0]; - } - if ((-direction)[1] == -1) { - shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); - } else if ((-direction)[1] == 1) { - shift_in_x2 = domain.mesh.n_active(in::x2); - } - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; - Kokkos::parallel_for( - "CommunicateParticles", - recv_count, - Lambda(index_t p) { - this_tag(index_last + p) = ParticleTag::alive; - this_i1(index_last + p) += shift_in_x1; - this_i2(index_last + p) += shift_in_x2; - this_i1_prev(index_last + p) += shift_in_x1; - this_i2_prev(index_last + p) += shift_in_x2; - }); - } else if constexpr (D == Dim::_3D) { - int shift_in_x1 { 0 }, shift_in_x2 { 0 }, shift_in_x3 { 0 }; - if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active(in::x1); - } - if ((-direction)[1] == -1) { - shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); - } else if ((-direction)[1] == 1) { - shift_in_x2 = domain.mesh.n_active(in::x2); - } - if ((-direction)[2] == -1) { - shift_in_x3 = -subdomain(recv_ind).mesh.n_active(in::x3); - } else if ((-direction)[2] == 1) { - shift_in_x3 = domain.mesh.n_active(in::x3); - } - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i3 = species.i3; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; - auto& this_i3_prev = species.i3_prev; - Kokkos::parallel_for( - "CommunicateParticles", - recv_count, - Lambda(index_t p) { - this_tag(index_last + p) = ParticleTag::alive; - this_i1(index_last + p) += shift_in_x1; - this_i2(index_last + p) += shift_in_x2; - this_i3(index_last + p) += shift_in_x3; - this_i1_prev(index_last + p) += shift_in_x1; - this_i2_prev(index_last + p) += shift_in_x2; - this_i3_prev(index_last + p) += shift_in_x3; - }); - } - index_last += recv_count; - species.set_npart(index_last); - } - Kokkos::deep_copy( - Kokkos::subview(species.tag, std::make_pair(send_pmin, send_pmax)), - ParticleTag::dead); - } - timers->stop("Communications"); - // !TODO: maybe there is a way to not sort twice - timers->start("Sorting"); - species.set_unsorted(); - species.SortByTags(); - timers->stop("Sorting"); -#endif - } - } - - /* - New function to communicate particles using a buffer - */ - template - void Metadomain::CommunicateParticlesBuffer(Domain& domain, - timer::Timers* timers) { - raise::ErrorIf(timers == nullptr, - "Timers not passed when Comm::Prtl called", - HERE); logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { - /* - Brief on arrays - npart_per_tag_arr (vector): | dead count| alive count | tag=1 count | tag=2 count | ... - <--------------------------size = ntags()--------------------------> - tag_offset (Kokkos::View): | 0 | dead count | dead + alive count | dead + alive + tag=1 count | ... - <--------------------------size = ntags()--------------------------> - npart_per_tag_arr_recv (vector): | 0 | 0 | nrecv1 | nrecv2 | ... - <--------------------------size = ntags()--------------------------> - */ - auto [npart_per_tag_arr, - tag_offset] = species.npart_per_tag(); - auto npart = static_cast(species.npart()); - auto total_alive = static_cast( - npart_per_tag_arr[ParticleTag::alive]); - auto total_dead = static_cast( - npart_per_tag_arr[ParticleTag::dead]); - auto total_holes = static_cast(npart - total_alive); - auto total_recv = static_cast(0); - + const auto ntags = species.ntags(); + + // at this point particles should already be tagged in the pusher + auto [npptag_vec, tag_offsets] = species.NpartsPerTagAndOffsets(); + const auto npart_dead = npptag_vec[ParticleTag::dead]; + const auto npart_alive = npptag_vec[ParticleTag::alive]; + + const auto npart = species.npart(); + const auto npart_holes = npart - npart_alive; + + // # of particles to receive per each tag (direction) + std::vector npptag_recv_vec(ntags - 2, 0); + // coordinate shifts per each direction + array_t shifts_in_x1("shifts_in_x1", ntags - 2); + array_t shifts_in_x2("shifts_in_x2", ntags - 2); + array_t shifts_in_x3("shifts_in_x3", ntags - 2); + // all directions requiring communication + dir::dirs_t dirs_to_comm; + + // ranks & indices of meshblock to send/recv from std::vector send_ranks, send_inds; std::vector recv_ranks, recv_inds; - // at this point particles should already by tagged in the pusher -#if defined(MPI_ENABLED) - std::vector npart_per_tag_arr_recv(species.ntags(), 0); - Kokkos::View shifts_in_x1("shifts_in_x1", species.ntags()); - Kokkos::View shifts_in_x2("shifts_in_x2", species.ntags()); - Kokkos::View shifts_in_x3("shifts_in_x3", species.ntags()); - auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); - auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); - auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); - dir::dirs_t legal_directions; - // Get receive counts + displacements - for (auto& direction : dir::Directions::all) { + // total # of reaceived particles from all directions + std::size_t npart_recv = 0u; + + for (const auto& direction : dir::Directions::all) { + // tags corresponding to the direction (both send & recv) const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); + + // get indices & ranks of send/recv meshblocks const auto [send_params, recv_params] = GetSendRecvParams(this, domain, direction, true); const auto [send_indrank, send_slice] = send_params; const auto [recv_indrank, recv_slice] = recv_params; const auto [send_ind, send_rank] = send_indrank; const auto [recv_ind, recv_rank] = recv_indrank; - if (send_rank < 0 and recv_rank < 0) { + + // skip if no communication is necessary + const auto is_sending = (send_rank >= 0); + const auto is_receiving = (recv_rank >= 0); + if (not is_sending and not is_receiving) { continue; } - const auto nsend = npart_per_tag_arr[tag_send]; - std::size_t nrecv = 0; - - legal_directions.push_back(direction); + dirs_to_comm.push_back(direction); send_ranks.push_back(send_rank); recv_ranks.push_back(recv_rank); send_inds.push_back(send_ind); recv_inds.push_back(recv_ind); - comm::ParticleSendRecvCount(send_rank, recv_rank, nsend, nrecv); - total_recv += nrecv; - npart_per_tag_arr_recv[tag_recv] = nrecv; - // Perform displacements before sending - if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { - if ((-direction)[0] == -1) { - shifts_in_x1_h(tag_recv) = subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shifts_in_x1_h(tag_recv) = -domain.mesh.n_active(in::x1); - } - } - if constexpr (D == Dim::_2D || D == Dim::_3D) { - if ((-direction)[1] == -1) { - shifts_in_x2_h(tag_recv) = subdomain(recv_ind).mesh.n_active(in::x2); - } else if ((-direction)[1] == 1) { - shifts_in_x2_h(tag_recv) = -domain.mesh.n_active(in::x2); - } - } - if constexpr (D == Dim::_3D) { - if ((-direction)[2] == -1) { - shifts_in_x3_h(tag_recv) = subdomain(recv_ind).mesh.n_active(in::x3); - } else if ((-direction)[2] == 1) { - shifts_in_x3_h(tag_recv) = -domain.mesh.n_active(in::x3); - } - } - } // end directions loop - Kokkos::deep_copy(shifts_in_x1, shifts_in_x1_h); - Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); - Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); - - raise::FatalIf((npart + total_recv) >= species.maxnpart(), - "Too many particles to receive (cannot fit into maxptl)", - HERE); - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; - auto& this_i2 = species.i2; - auto& this_i2_prev = species.i2_prev; - auto& this_i3 = species.i3; - auto& this_i3_prev = species.i3_prev; + // record the # of particles to-be-sent + const auto nsend = npptag_vec[tag_send]; - /* - Brief on permute vector: It contains the sorted indices of tag != alive particles - E.g., consider the following tag array - species.tag = [ 0, 0, 1, 0, 2, 3,...] - Then, permute vector will look something like - permute_vector = [0, 1, 3, ..., 4, ..., ... 5, ... ] - |<--------- >| |<----->| |<----->| .... - tag=dead ct tag=2 ct tag=3 ct - */ - Kokkos::View permute_vector("permute_vector", total_holes); - Kokkos::View current_offset("current_offset", species.ntags()); - auto &this_tag_offset = tag_offset; + // request the # of particles to-be-received ... + // ... and send the # of particles to-be-sent + std::size_t nrecv = 0; + comm::ParticleSendRecvCount(send_rank, recv_rank, nsend, nrecv); + npart_recv += nrecv; + npptag_recv_vec[tag_recv - 2] = nrecv; - auto n_alive = npart_per_tag_arr[ParticleTag::alive]; + raise::ErrorIf((npart + npart_recv) >= species.maxnpart(), + "Too many particles to receive (cannot fit into maxptl)", + HERE); - if constexpr (D == Dim::_1D){ - Kokkos::parallel_for( - "PermuteVector and Displace", - species.npart(), - Lambda(index_t p) { - const auto current_tag = this_tag(p); - if (current_tag != ParticleTag::alive){ - // dead tags only - if (current_tag == ParticleTag::dead) { - const auto idx_permute_vec = Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; - } - // tag = 1->N (excluding dead and alive) - else{ - const auto idx_permute_vec = this_tag_offset(current_tag) - - n_alive + - Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; - this_i1(p) += shifts_in_x1(current_tag); - this_i1_prev(p) += shifts_in_x1(current_tag); + // if sending, record displacements to apply before + // ... tag_send - 2: because we only shift tags > 2 (i.e. no dead/alive) + if (is_sending) { + if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { + auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); + if (direction[0] == -1) { + // sending backwards in x1 (add sx1 of target meshblock) + shifts_in_x1_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( + in::x1); + } else if (direction[0] == 1) { + // sending forward in x1 (subtract sx1 of source meshblock) + shifts_in_x1_h(tag_send - 2) = -domain.mesh.n_active(in::x1); } + Kokkos::deep_copy(shifts_in_x1, shifts_in_x1_h); } - }); - } - - if constexpr (D == Dim::_2D){ - Kokkos::parallel_for( - "PermuteVector and Displace", - species.npart(), - Lambda(index_t p) { - const auto current_tag = this_tag(p); - if (current_tag != ParticleTag::alive){ - // dead tags only - if (current_tag == ParticleTag::dead) { - const auto idx_permute_vec = Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; + if constexpr (D == Dim::_2D || D == Dim::_3D) { + auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); + if (direction[1] == -1) { + shifts_in_x2_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( + in::x2); + } else if (direction[1] == 1) { + shifts_in_x2_h(tag_send - 2) = -domain.mesh.n_active(in::x2); } - // tag = 1->N (excluding dead and alive) - else{ - const auto idx_permute_vec = this_tag_offset(current_tag) - - n_alive + - Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; - this_i1(p) += shifts_in_x1(current_tag); - this_i1_prev(p) += shifts_in_x1(current_tag); - this_i2(p) += shifts_in_x2(current_tag); - this_i2_prev(p) += shifts_in_x2(current_tag); + Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); + } + if constexpr (D == Dim::_3D) { + auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); + if (direction[2] == -1) { + shifts_in_x3_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( + in::x3); + } else if (direction[2] == 1) { + shifts_in_x3_h(tag_send - 2) = -domain.mesh.n_active(in::x3); } + Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); } - }); - } + } + } // end directions loop - if constexpr (D == Dim::_3D){ + auto& this_tag = species.tag; + auto& this_i1 = species.i1; + auto& this_i1_prev = species.i1_prev; + auto& this_i2 = species.i2; + auto& this_i2_prev = species.i2_prev; + auto& this_i3 = species.i3; + auto& this_i3_prev = species.i3_prev; + + array_t outgoing_indices("outgoing_indices", + npart - npart_alive); + + array_t current_offset("current_offset", ntags); Kokkos::parallel_for( - "PermuteVector and Displace", - species.npart(), + "OutgoingIndicesAndDisplace", + species.rangeActiveParticles(), Lambda(index_t p) { - const auto current_tag = this_tag(p); - if (current_tag != ParticleTag::alive){ - // dead tags only - if (current_tag == ParticleTag::dead) { - const auto idx_permute_vec = Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; + if (this_tag(p) != ParticleTag::alive) { + // dead or to-be-sent + const auto idx_for_tag = + Kokkos::atomic_fetch_add(¤t_offset(this_tag(p)), 1) + + (this_tag(p) != ParticleTag::dead ? npart_dead : 0) + + (this_tag(p) > 2 ? tag_offsets(this_tag(p) - 3) : 0); + if (idx_for_tag >= npart - npart_alive) { + raise::KernelError(HERE, + "Outgoing indices idx exceeds the array size"); } - // tag = 1->N (excluding dead and alive) - else{ - const auto idx_permute_vec = this_tag_offset(current_tag) - - n_alive + - Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; - this_i1(p) += shifts_in_x1(current_tag); - this_i1_prev(p) += shifts_in_x1(current_tag); - this_i2(p) += shifts_in_x2(current_tag); - this_i2_prev(p) += shifts_in_x2(current_tag); - this_i3(p) += shifts_in_x3(current_tag); - this_i3_prev(p) += shifts_in_x3(current_tag); + outgoing_indices(idx_for_tag) = p; + // apply offsets + if (this_tag(p) != ParticleTag::dead) { + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + this_i1(p) += shifts_in_x1(this_tag(p) - 2); + this_i1_prev(p) += shifts_in_x1(this_tag(p) - 2); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + this_i2(p) += shifts_in_x2(this_tag(p) - 2); + this_i2_prev(p) += shifts_in_x2(this_tag(p) - 2); + } + if constexpr (D == Dim::_3D) { + this_i3(p) += shifts_in_x3(this_tag(p) - 2); + this_i3_prev(p) += shifts_in_x3(this_tag(p) - 2); + } } } }); - } - - - - // Sanity check: npart_per_tag must be equal to the current offset except tag=alive - auto current_offset_h = Kokkos::create_mirror_view(current_offset); - Kokkos::deep_copy(current_offset_h, current_offset); - for (std::size_t i { 0 }; i < species.ntags(); ++i) { - if (i != ParticleTag::alive){ - raise::FatalIf(current_offset_h(i) != npart_per_tag_arr[i], - "Error in permute vector construction", - HERE); - } - else{ - raise::FatalIf(current_offset_h(i) != 0, - "Error in permute vector construction", - HERE); - } - } - - /* - Brief on allocation vector: It contains the indices of holes that are filled - by the particles received from other domains - case 1: total_recv > nholes - allocation_vector = | i1 | i2 | i3 | .... | npart | npart + 1 | ... - <-------total_holes------> <---total_recv - nholes--> - (same as permuute vector) (extra particles appended at end) - case 2: total_recv <= nholes - allocation_vector = | i1 | i2 | i3 | .... - <----total_recv-----> - (same as permuute vector) - */ - Kokkos::View allocation_vector("allocation_vector", total_recv); - if (total_recv > total_holes) - { - // Fill the first bit with the permute vector; these are the holes to be filled - Kokkos::parallel_for( - "AllocationVector", - total_holes, - Lambda(index_t p) { - allocation_vector(p) = permute_vector(p); - }); - - // Now allocate the rest to the end of the array - Kokkos::parallel_for( - "AllocationVector", - total_recv - total_holes, - Lambda(index_t p) { - allocation_vector(total_holes + p) = static_cast(npart + p); - }); - } - else - { Kokkos::parallel_for( - "AllocationVector", - total_recv, - Lambda(index_t p) { - allocation_vector(p) = permute_vector(p); - }); - } - - /* - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - if (rank == 1 && species.label() == "e+_b") - { - // Copy the tag array to host - auto tag_h = Kokkos::create_mirror_view(species.tag); - Kokkos::deep_copy(tag_h, species.tag); - std::cout << "Tag locs before send" << std::endl; - for (std::size_t i { 0 }; i < species.npart(); i++) { - if (tag_h(i) != ParticleTag::alive) - std::cout <<" Tag: " << tag_h(i) << " loc: "<< i << std::endl; - } - - // Print allocation vector after copying to host - auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); - std::cout << "Total holes: " << total_holes << " Total recv: " << total_recv << std::endl; - Kokkos::deep_copy(allocation_vector_h, allocation_vector); - for (std::size_t i { 0 }; i < total_recv; ++i) { - std::cout << "Rank: " << rank << " Allocation vector: " << allocation_vector_h(i) << std::endl; - } - // Print the permute vector as well - auto permute_vector_h = Kokkos::create_mirror_view(permute_vector); - Kokkos::deep_copy(permute_vector_h, permute_vector); - for (std::size_t i { 0 }; i < total_holes; ++i) { - std::cout << "Rank: " << rank << " Permuted vector: " << permute_vector_h(i) << - " tag: " << tag_h(permute_vector_h(i)) << std::endl; - } - } - */ - - // Communicate the arrays - comm::CommunicateParticlesBuffer(species, permute_vector, allocation_vector, - this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, - send_ranks, recv_ranks, legal_directions); + comm::CommunicateParticles(species, + outgoing_indices, + tag_offsets, + npptag_vec, + npptag_recv_vec, + send_ranks, + recv_ranks, + dirs_to_comm); + species.set_unsorted(); + } // end species loop +#else + (void)domain; #endif - } } - /* - Function to copy the alive particle data the arrays to a buffer and then back - to the particle arrays -*/ - template - void MoveDeadToEnd(array_t& arr, - Kokkos::View indices_alive) { - auto n_alive = indices_alive.extent(0); - auto buffer = Kokkos::View("buffer", n_alive); - Kokkos::parallel_for( - "PopulateBufferAlive", - n_alive, - Lambda(const std::size_t p) { - buffer(p) = arr(indices_alive(p)); - }); - - Kokkos::parallel_for( - "CopyBufferToArr", - n_alive, - Lambda(const std::size_t p) { - arr(p) = buffer(p); - }); - return; - } - - /* - Function to remove dead particles from the domain - - Consider the following particle quantity array - <---xxx---x---xx---xx-----------xx----x--> (qty) - - = alive - x = dead - ntot = nalive + ndead - - (1) Copy all alive particle data to buffer - <---xxx---x---xx---xx-----------xx----x--> (qty) - | - | - v - <--------------------------> buffer - (nalive) - - (2) Copy from buffer to the beginning of the array - overwritting all particles - <--------------------------> buffer - (nalive) - | - | - v - <--------------------------xx----x--> (qty) - ^ - (nalive) - - (3) Set npart to nalive - */ template - void Metadomain::RemoveDeadParticles(Domain& domain, - timer::Timers* timers){ + void Metadomain::RemoveDeadParticles(Domain& domain) { for (auto& species : domain.species) { - auto [npart_per_tag_arr, - tag_offset] = species.npart_per_tag(); - const auto npart = static_cast(species.npart()); - const auto total_alive = static_cast( - npart_per_tag_arr[ParticleTag::alive]); - const auto total_dead = static_cast( - npart_per_tag_arr[ParticleTag::dead]); - - // Check that only alive and dead particles are present - for (std::size_t i { 0 }; i < species.ntags(); i++) { - if (i != ParticleTag::alive && i != ParticleTag::dead){ - raise::FatalIf(npart_per_tag_arr[i] != 0, - "Particle tags can only be dead or alive at this point", - HERE); - } - } - - // Get the indices of all alive particles - auto &this_i1 = species.i1; - auto &this_i2 = species.i2; - auto &this_i3 = species.i3; - auto &this_i1_prev = species.i1_prev; - auto &this_i2_prev = species.i2_prev; - auto &this_i3_prev = species.i3_prev; - auto &this_dx1 = species.dx1; - auto &this_dx2 = species.dx2; - auto &this_dx3 = species.dx3; - auto &this_dx1_prev = species.dx1_prev; - auto &this_dx2_prev = species.dx2_prev; - auto &this_dx3_prev = species.dx3_prev; - auto &this_ux1 = species.ux1; - auto &this_ux2 = species.ux2; - auto &this_ux3 = species.ux3; - auto &this_weight = species.weight; - auto &this_phi = species.phi; - auto &this_tag = species.tag; - // Find indices of tag = alive particles - Kokkos::View indices_alive("indices_alive", total_alive); - Kokkos::View alive_counter("counter_alive", 1); - Kokkos::deep_copy(alive_counter, 0); - Kokkos::parallel_for( - "Indices of Alive Particles", - species.npart(), - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); - indices_alive(idx) = p; - } - }); - // Sanity check: alive_counter must be equal to total_alive - auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); - Kokkos::deep_copy(alive_counter_h, alive_counter); - raise::FatalIf(alive_counter_h(0) != total_alive, - "Error in finding alive particles", - HERE); - - MoveDeadToEnd(species.i1, indices_alive); - MoveDeadToEnd(species.dx1, indices_alive); - MoveDeadToEnd(species.dx1_prev, indices_alive); - MoveDeadToEnd(species.ux1, indices_alive); - MoveDeadToEnd(species.ux2, indices_alive); - MoveDeadToEnd(species.ux3, indices_alive); - MoveDeadToEnd(species.weight, indices_alive); - // Update i2, dx2, i2_prev, dx2_prev - if constexpr(D == Dim::_2D || D == Dim::_3D){ - MoveDeadToEnd(species.i2, indices_alive); - MoveDeadToEnd(species.i2_prev, indices_alive); - MoveDeadToEnd(species.dx2, indices_alive); - MoveDeadToEnd(species.dx2_prev, indices_alive); - if constexpr(D == Dim::_2D && M::CoordType != Coord::Cart){ - MoveDeadToEnd(species.phi, indices_alive); - } - } - // Update i3, dx3, i3_prev, dx3_prev - if constexpr(D == Dim::_3D){ - MoveDeadToEnd(species.i3, indices_alive); - MoveDeadToEnd(species.i3_prev, indices_alive); - MoveDeadToEnd(species.dx3, indices_alive); - MoveDeadToEnd(species.dx3_prev, indices_alive); - } - // tags (set first total_alive to alive and rest to dead) - Kokkos::parallel_for( - "Make tags alive", - total_alive, - Lambda(index_t p) { - this_tag(p) = ParticleTag::alive; - }); - - Kokkos::parallel_for( - "Make tags dead", - total_dead, - Lambda(index_t p) { - this_tag(total_alive + p) = ParticleTag::dead; - }); - - species.set_npart(total_alive); - - std::tie(npart_per_tag_arr, - tag_offset) = species.npart_per_tag(); - raise::FatalIf(npart_per_tag_arr[ParticleTag::alive] != total_alive, - "Error in removing dead particles: alive count doesn't match", - HERE); - raise::FatalIf(npart_per_tag_arr[ParticleTag::dead] != 0, - "Error in removing dead particles: not all particles are dead", - HERE); - + species.RemoveDead(); } - - return; } template struct Metadomain>; diff --git a/src/framework/domain/domain.h b/src/framework/domain/domain.h index 397907fef..bc7c6e4b5 100644 --- a/src/framework/domain/domain.h +++ b/src/framework/domain/domain.h @@ -65,7 +65,7 @@ namespace ntt { Mesh mesh; Fields fields; std::vector> species; - random_number_pool_t random_pool { constant::RandomSeed }; + random_number_pool_t random_pool; /** * @brief constructor for "empty" allocation of non-local domain placeholders @@ -81,6 +81,7 @@ namespace ntt { : mesh { ncells, extent, metric_params } , fields {} , species {} + , random_pool { constant::RandomSeed } , m_index { index } , m_offset_ndomains { offset_ndomains } , m_offset_ncells { offset_ncells } {} @@ -95,6 +96,7 @@ namespace ntt { : mesh { ncells, extent, metric_params } , fields { ncells } , species { species_params.begin(), species_params.end() } + , random_pool { constant::RandomSeed + static_cast(index) } , m_index { index } , m_offset_ndomains { offset_ndomains } , m_offset_ncells { offset_ncells } {} @@ -144,8 +146,7 @@ namespace ntt { } /* setters -------------------------------------------------------------- */ - auto set_neighbor_idx(const dir::direction_t& dir, unsigned int idx) - -> void { + auto set_neighbor_idx(const dir::direction_t& dir, unsigned int idx) -> void { m_neighbor_idx[dir] = idx; } @@ -163,8 +164,8 @@ namespace ntt { }; template - inline auto operator<<(std::ostream& os, const Domain& domain) - -> std::ostream& { + inline auto operator<<(std::ostream& os, + const Domain& domain) -> std::ostream& { os << "Domain #" << domain.index(); #if defined(MPI_ENABLED) os << " [MPI rank: " << domain.mpi_rank() << "]"; diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index a01296823..ec8561a9a 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -399,33 +399,6 @@ namespace ntt { #endif } - // Function to assign a unique ID to each particle - template - void Metadomain::SetParticleIDs(Domain& domain){ - for (auto& species : domain.species) { - auto &this_particleID = species.particleID; - auto &this_tag = species.tag; - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - const auto offset_per_rank = static_cast(1e9 * rank); - std::size_t current_particleID = 0; - Kokkos::View counter_view("current_particleID", 1); - Kokkos::deep_copy(counter_view, current_particleID); - - Kokkos::parallel_for( - "Set Particle IDs", - species.npart(), - Lambda(const std::size_t p){ - if (this_tag(p) == ParticleTag::alive) - { - Kokkos::atomic_increment(&counter_view(0)); - this_particleID(p) = offset_per_rank + static_cast(counter_view(0)); - } - }); - } - return; - } - template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 9e2c2bb9d..5177571d0 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -88,10 +88,8 @@ namespace ntt { void CommunicateFields(Domain&, CommTags); void SynchronizeFields(Domain&, CommTags, const range_tuple_t& = { 0, 0 }); - void CommunicateParticles(Domain&, timer::Timers*); - void CommunicateParticlesBuffer(Domain&, timer::Timers*); - void SetParticleIDs(Domain&); - void RemoveDeadParticles(Domain& ,timer::Timers* ); + void CommunicateParticles(Domain&); + void RemoveDeadParticles(Domain&); /** * @param global_ndomains total number of domains diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 4a6b2c908..c39f0c67f 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -107,7 +107,6 @@ namespace ntt { } } - template void ComputeMoments(const SimulationParams& params, const Mesh& mesh, @@ -475,16 +474,14 @@ namespace ntt { for (const auto& prtl : g_writer.speciesWriters()) { auto& species = local_domain->species[prtl.species() - 1]; if (not species.is_sorted()) { - species.SortByTags(); + species.RemoveDead(); } const std::size_t nout = species.npart() / prtl_stride; array_t buff_x1, buff_x2, buff_x3; - array_t buff_ux1, buff_ux2, buff_ux3; - array_t buff_wei; - buff_wei = array_t { "w", nout }; - buff_ux1 = array_t { "u1", nout }; - buff_ux2 = array_t { "u2", nout }; - buff_ux3 = array_t { "u3", nout }; + array_t buff_ux1 { "u1", nout }; + array_t buff_ux2 { "ux2", nout }; + array_t buff_ux3 { "ux3", nout }; + array_t buff_wei { "w", nout }; if constexpr (M::Dim == Dim::_1D or M::Dim == Dim::_2D or M::Dim == Dim::_3D) { buff_x1 = array_t { "x1", nout }; diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index b667b5ac9..af7a773ed 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -31,10 +31,10 @@ namespace ntt { template - auto get_dx0_V0(const std::vector& resolution, - const boundaries_t& extent, - const std::map& params) - -> std::pair { + auto get_dx0_V0( + const std::vector& resolution, + const boundaries_t& extent, + const std::map& params) -> std::pair { const auto metric = M(resolution, extent, params); const auto dx0 = metric.dxMin(); coord_t x_corner { ZERO }; @@ -445,15 +445,8 @@ namespace ntt { defaults::gr::pusher_niter)); } /* [particles] ---------------------------------------------------------- */ -#if defined(MPI_ENABLED) - const std::size_t sort_interval = 1; -#else - const std::size_t sort_interval = toml::find_or(toml_data, - "particles", - "sort_interval", - defaults::sort_interval); -#endif - set("particles.sort_interval", sort_interval); + set("particles.clear_interval", + toml::find_or(toml_data, "particles", "clear_interval", defaults::clear_interval)); /* [output] ------------------------------------------------------------- */ // fields diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 393cd2409..1a4228642 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -48,7 +48,7 @@ const auto mink_1d = u8R"( [particles] ppc0 = 10.0 - sort_interval = 100 + clear_interval = 100 [[particles.species]] label = "e-" @@ -134,7 +134,7 @@ const auto sph_2d = u8R"( [particles] ppc0 = 25.0 use_weights = true - sort_interval = 50 + clear_interval = 50 [[particles.species]] @@ -199,7 +199,7 @@ const auto qks_2d = u8R"( [particles] ppc0 = 4.0 - sort_interval = 100 + clear_interval = 100 [[particles.species]] label = "e-" @@ -269,7 +269,7 @@ auto main(int argc, char* argv[]) -> int { (real_t)0.0078125, "scales.V0"); boundaries_t fbc = { - {FldsBC::PERIODIC, FldsBC::PERIODIC} + { FldsBC::PERIODIC, FldsBC::PERIODIC } }; assert_equal( params_mink_1d.get>("grid.boundaries.fields")[0].first, @@ -345,8 +345,8 @@ auto main(int argc, char* argv[]) -> int { "simulation.engine"); boundaries_t fbc = { - {FldsBC::ATMOSPHERE, FldsBC::ABSORB}, - { FldsBC::AXIS, FldsBC::AXIS} + { FldsBC::ATMOSPHERE, FldsBC::ABSORB }, + { FldsBC::AXIS, FldsBC::AXIS } }; assert_equal(params_sph_2d.get("scales.B0"), @@ -480,9 +480,9 @@ auto main(int argc, char* argv[]) -> int { "grid.metric.ks_rh"); const auto expect = std::map { - {"r0", 0.0}, - { "h", 0.25}, - { "a", 0.99} + { "r0", 0.0 }, + { "h", 0.25 }, + { "a", 0.99 } }; auto read = params_qks_2d.get>( "grid.metric.params"); @@ -501,8 +501,8 @@ auto main(int argc, char* argv[]) -> int { "algorithms.gr.pusher_niter"); boundaries_t pbc = { - {PrtlBC::HORIZON, PrtlBC::ABSORB}, - { PrtlBC::AXIS, PrtlBC::AXIS} + { PrtlBC::HORIZON, PrtlBC::ABSORB }, + { PrtlBC::AXIS, PrtlBC::AXIS } }; assert_equal(params_qks_2d.get("scales.B0"), @@ -579,86 +579,3 @@ auto main(int argc, char* argv[]) -> int { return 0; } - -// const auto mink_1d = R"( -// [simulation] -// name = "" -// engine = "" -// runtime = "" - -// [grid] -// resolution = "" -// extent = "" - -// [grid.metric] -// metric = "" -// qsph_r0 = "" -// qsph_h = "" -// ks_a = "" - -// [grid.boundaries] -// fields = "" -// particles = "" -// absorb_d = "" -// absorb_coeff = "" - -// [scales] -// larmor0 = "" -// skindepth0 = "" - -// [algorithms] -// current_filters = "" - -// [algorithms.toggles] -// fieldsolver = "" -// deposit = "" - -// [algorithms.timestep] -// CFL = "" -// correction = "" - -// [algorithms.gr] -// pusher_eps = "" -// pusher_niter = "" - -// [algorithms.gca] -// e_ovr_b_max = "" -// larmor_max = "" - -// [algorithms.synchrotron] -// gamma_rad = "" - -// [particles] -// ppc0 = "" -// use_weights = "" -// sort_interval = "" - -// [[particles.species]] -// label = "" -// mass = "" -// charge = "" -// maxnpart = "" -// pusher = "" -// n_payloads = "" -// cooling = "" -// [setup] - -// [output] -// fields = "" -// particles = "" -// format = "" -// mom_smooth = "" -// fields_stride = "" -// prtl_stride = "" -// interval = "" -// interval_time = "" - -// [output.debug] -// as_is = "" -// ghosts = "" - -// [diagnostics] -// interval = "" -// log_level = "" -// blocking_timers = "" -// )"_toml; diff --git a/src/global/arch/directions.h b/src/global/arch/directions.h index 19cf182d6..ccd4e67b0 100644 --- a/src/global/arch/directions.h +++ b/src/global/arch/directions.h @@ -132,8 +132,8 @@ namespace dir { using dirs_t = std::vector>; template - inline auto operator<<(std::ostream& os, const direction_t& dir) - -> std::ostream& { + inline auto operator<<(std::ostream& os, + const direction_t& dir) -> std::ostream& { for (auto& d : dir) { os << std::setw(2) << std::left; if (d > 0) { @@ -175,81 +175,81 @@ namespace dir { template <> struct Directions { inline static const dirs_t all = { - {-1, -1}, - {-1, 0}, - {-1, 1}, - { 0, -1}, - { 0, 1}, - { 1, -1}, - { 1, 0}, - { 1, 1} + { -1, -1 }, + { -1, 0 }, + { -1, 1 }, + { 0, -1 }, + { 0, 1 }, + { 1, -1 }, + { 1, 0 }, + { 1, 1 } }; inline static const dirs_t orth = { - {-1, 0}, - { 0, -1}, - { 0, 1}, - { 1, 0} + { -1, 0 }, + { 0, -1 }, + { 0, 1 }, + { 1, 0 } }; inline static const dirs_t unique = { - { 0, 1}, - { 1, 1}, - { 1, 0}, - {-1, 1} + { 0, 1 }, + { 1, 1 }, + { 1, 0 }, + { -1, 1 } }; }; template <> struct Directions { inline static const dirs_t all = { - {-1, -1, -1}, - {-1, -1, 0}, - {-1, -1, 1}, - {-1, 0, -1}, - {-1, 0, 0}, - {-1, 0, 1}, - {-1, 1, -1}, - {-1, 1, 0}, - {-1, 1, 1}, - { 0, -1, -1}, - { 0, -1, 0}, - { 0, -1, 1}, - { 0, 0, -1}, - { 0, 0, 1}, - { 0, 1, -1}, - { 0, 1, 0}, - { 0, 1, 1}, - { 1, -1, -1}, - { 1, -1, 0}, - { 1, -1, 1}, - { 1, 0, -1}, - { 1, 0, 0}, - { 1, 0, 1}, - { 1, 1, -1}, - { 1, 1, 0}, - { 1, 1, 1} + { -1, -1, -1 }, + { -1, -1, 0 }, + { -1, -1, 1 }, + { -1, 0, -1 }, + { -1, 0, 0 }, + { -1, 0, 1 }, + { -1, 1, -1 }, + { -1, 1, 0 }, + { -1, 1, 1 }, + { 0, -1, -1 }, + { 0, -1, 0 }, + { 0, -1, 1 }, + { 0, 0, -1 }, + { 0, 0, 1 }, + { 0, 1, -1 }, + { 0, 1, 0 }, + { 0, 1, 1 }, + { 1, -1, -1 }, + { 1, -1, 0 }, + { 1, -1, 1 }, + { 1, 0, -1 }, + { 1, 0, 0 }, + { 1, 0, 1 }, + { 1, 1, -1 }, + { 1, 1, 0 }, + { 1, 1, 1 } }; inline static const dirs_t orth = { - {-1, 0, 0}, - { 0, -1, 0}, - { 0, 0, -1}, - { 0, 0, 1}, - { 0, 1, 0}, - { 1, 0, 0} + { -1, 0, 0 }, + { 0, -1, 0 }, + { 0, 0, -1 }, + { 0, 0, 1 }, + { 0, 1, 0 }, + { 1, 0, 0 } }; inline static const dirs_t unique = { - { 0, 0, 1}, - { 0, 1, 0}, - { 1, 0, 0}, - { 1, 1, 0}, - {-1, 1, 0}, - { 0, 1, 1}, - { 0, -1, 1}, - { 1, 0, 1}, - {-1, 0, 1}, - { 1, 1, 1}, - {-1, 1, 1}, - { 1, -1, 1}, - { 1, 1, -1} + { 0, 0, 1 }, + { 0, 1, 0 }, + { 1, 0, 0 }, + { 1, 1, 0 }, + { -1, 1, 0 }, + { 0, 1, 1 }, + { 0, -1, 1 }, + { 1, 0, 1 }, + { -1, 0, 1 }, + { 1, 1, 1 }, + { -1, 1, 1 }, + { 1, -1, 1 }, + { 1, 1, -1 } }; }; diff --git a/src/global/arch/kokkos_aliases.cpp b/src/global/arch/kokkos_aliases.cpp index 4311a40bd..6c15e3d52 100644 --- a/src/global/arch/kokkos_aliases.cpp +++ b/src/global/arch/kokkos_aliases.cpp @@ -5,18 +5,18 @@ #include template <> -auto CreateRangePolicy(const tuple_t& i1, - const tuple_t& i2) - -> range_t { +auto CreateRangePolicy( + const tuple_t& i1, + const tuple_t& i2) -> range_t { index_t i1min = i1[0]; index_t i1max = i2[0]; return Kokkos::RangePolicy(i1min, i1max); } template <> -auto CreateRangePolicy(const tuple_t& i1, - const tuple_t& i2) - -> range_t { +auto CreateRangePolicy( + const tuple_t& i1, + const tuple_t& i2) -> range_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -26,9 +26,9 @@ auto CreateRangePolicy(const tuple_t& i1, } template <> -auto CreateRangePolicy(const tuple_t& i1, - const tuple_t& i2) - -> range_t { +auto CreateRangePolicy( + const tuple_t& i1, + const tuple_t& i2) -> range_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -41,18 +41,18 @@ auto CreateRangePolicy(const tuple_t& i1, } template <> -auto CreateRangePolicyOnHost(const tuple_t& i1, - const tuple_t& i2) - -> range_h_t { +auto CreateRangePolicyOnHost( + const tuple_t& i1, + const tuple_t& i2) -> range_h_t { index_t i1min = i1[0]; index_t i1max = i2[0]; return Kokkos::RangePolicy(i1min, i1max); } template <> -auto CreateRangePolicyOnHost(const tuple_t& i1, - const tuple_t& i2) - -> range_h_t { +auto CreateRangePolicyOnHost( + const tuple_t& i1, + const tuple_t& i2) -> range_h_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -62,9 +62,9 @@ auto CreateRangePolicyOnHost(const tuple_t& i1, } template <> -auto CreateRangePolicyOnHost(const tuple_t& i1, - const tuple_t& i2) - -> range_h_t { +auto CreateRangePolicyOnHost( + const tuple_t& i1, + const tuple_t& i2) -> range_h_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -76,11 +76,11 @@ auto CreateRangePolicyOnHost(const tuple_t& i1, { i1max, i2max, i3max }); } -// auto WaitAndSynchronize(bool debug_only) -> void { -// if (debug_only) { -// #ifndef DEBUG -// return; -// #endif -// } -// Kokkos::fence(); -// } \ No newline at end of file +auto WaitAndSynchronize(bool debug_only) -> void { + if (debug_only) { +#ifndef DEBUG + return; +#endif + } + Kokkos::fence(); +} diff --git a/src/global/defaults.h b/src/global/defaults.h index be92acbf9..b7b0107e7 100644 --- a/src/global/defaults.h +++ b/src/global/defaults.h @@ -22,9 +22,9 @@ namespace ntt::defaults { const unsigned short current_filters = 0; - const std::string em_pusher = "Boris"; - const std::string ph_pusher = "Photon"; - const std::size_t sort_interval = 100; + const std::string em_pusher = "Boris"; + const std::string ph_pusher = "Photon"; + const std::size_t clear_interval = 100; namespace qsph { const real_t r0 = 0.0; @@ -45,7 +45,7 @@ namespace ntt::defaults { const real_t ds_frac = 0.01; const real_t coeff = 1.0; } // namespace absorb - } // namespace bc + } // namespace bc namespace output { const std::string format = "hdf5"; diff --git a/src/global/global.cpp b/src/global/global.cpp index 434740446..ec22fd2f3 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -9,17 +9,7 @@ void ntt::GlobalInitialize(int argc, char* argv[]) { Kokkos::initialize(argc, argv); #if defined(MPI_ENABLED) - int required = MPI_THREAD_MULTIPLE; - int provided; - MPI_Init_thread(&argc, - &argv, - required, - &provided); - if (provided != required) { - std::cerr << "MPI_Init_thread() did not provide the requested threading support." << std::endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - //MPI_Init(&argc, &argv); + MPI_Init(&argc, &argv); #endif // MPI_ENABLED } diff --git a/src/global/global.h b/src/global/global.h index ad524fb0e..dad6afccc 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -209,7 +209,7 @@ namespace Timer { PrintTitle = 1 << 1, AutoConvert = 1 << 2, PrintOutput = 1 << 3, - PrintSorting = 1 << 4, + PrintPrtlClear = 1 << 4, PrintCheckpoint = 1 << 5, PrintNormed = 1 << 6, Default = PrintNormed | PrintTotal | PrintTitle | AutoConvert, diff --git a/src/global/utils/diag.cpp b/src/global/utils/diag.cpp index 0a499dd56..c053cdacf 100644 --- a/src/global/utils/diag.cpp +++ b/src/global/utils/diag.cpp @@ -21,8 +21,9 @@ #include namespace diag { - auto npart_stats(std::size_t npart, std::size_t maxnpart) - -> std::vector> { + auto npart_stats( + std::size_t npart, + std::size_t maxnpart) -> std::vector> { auto stats = std::vector>(); #if !defined(MPI_ENABLED) stats.push_back( @@ -84,7 +85,7 @@ namespace diag { const std::vector& species_labels, const std::vector& species_npart, const std::vector& species_maxnpart, - bool print_sorting, + bool print_prtl_clear, bool print_output, bool print_checkpoint, bool print_colors) { @@ -96,8 +97,8 @@ namespace diag { if (species_labels.size() == 0) { diag_flags ^= Diag::Species; } - if (print_sorting) { - timer_flags |= Timer::PrintSorting; + if (print_prtl_clear) { + timer_flags |= Timer::PrintPrtlClear; } if (print_output) { timer_flags |= Timer::PrintOutput; diff --git a/src/global/utils/diag.h b/src/global/utils/diag.h index 9951602f8..30cca5705 100644 --- a/src/global/utils/diag.h +++ b/src/global/utils/diag.h @@ -34,9 +34,9 @@ namespace diag { * @param species_labels (vector of particle labels) * @param npart (per each species) * @param maxnpart (per each species) - * @param sorting_step (if true, particles were sorted) - * @param output_step (if true, output was written) - * @param checkpoint_step (if true, checkpoint was written) + * @param prtlclear (if true, dead particles were removed) + * @param output (if true, output was written) + * @param checkpoint (if true, checkpoint was written) * @param colorful_print (if true, print with colors) */ void printDiagnostics(std::size_t, diff --git a/src/global/utils/timer.cpp b/src/global/utils/timer.cpp index b5f4408ca..7d5a9bebd 100644 --- a/src/global/utils/timer.cpp +++ b/src/global/utils/timer.cpp @@ -127,10 +127,11 @@ namespace timer { return timer_stats; } - auto Timers::printAll(TimerFlags flags, std::size_t npart, std::size_t ncells) const - -> std::string { - const std::vector extras { "Sorting", "Output", "Checkpoint" }; - const auto stats = gather(extras, npart, ncells); + auto Timers::printAll(TimerFlags flags, + std::size_t npart, + std::size_t ncells) const -> std::string { + const std::vector extras { "PrtlClear", "Output", "Checkpoint" }; + const auto stats = gather(extras, npart, ncells); if (stats.empty()) { return ""; } @@ -253,8 +254,8 @@ namespace timer { } } - // print extra timers for output/checkpoint/sorting - const std::vector extras_f { Timer::PrintSorting, + // print extra timers for output/checkpoint/prtlClear + const std::vector extras_f { Timer::PrintPrtlClear, Timer::PrintOutput, Timer::PrintCheckpoint }; for (auto i { 0u }; i < extras.size(); ++i) { diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 0deb73c6f..b4808f12a 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -90,7 +90,7 @@ namespace kernel::sr { Force(const F& pgen_force) : Force { pgen_force, - {ZERO, ZERO, ZERO}, + { ZERO, ZERO, ZERO }, ZERO, ZERO } { From 541633ead6dfde4444836c901059e9b0221e2e0a Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 22 Jan 2025 15:21:53 -0500 Subject: [PATCH 082/124] toml schema --- .taplo.toml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .taplo.toml diff --git a/.taplo.toml b/.taplo.toml new file mode 100644 index 000000000..423a47594 --- /dev/null +++ b/.taplo.toml @@ -0,0 +1,6 @@ +[formatting] + align_entries = true + indent_tables = true + indent_entries = true + trailing_newline = true + align_comments = true From 9d0c8dbd5f85983d5ef05b970d99ae9742cd4f66 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 22 Jan 2025 15:22:41 -0500 Subject: [PATCH 083/124] nix shells --- dev/nix/adios2.nix | 61 ++++++++++++++++++++++++++++++++++++++++++++++ dev/nix/shell.nix | 56 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 dev/nix/adios2.nix create mode 100644 dev/nix/shell.nix diff --git a/dev/nix/adios2.nix b/dev/nix/adios2.nix new file mode 100644 index 000000000..19c706aa4 --- /dev/null +++ b/dev/nix/adios2.nix @@ -0,0 +1,61 @@ +{ + pkgs ? import { }, + hdf5 ? false, + mpi ? false, +}: + +let + name = "adios2"; + version = "2.10.2"; +in +pkgs.stdenv.mkDerivation { + pname = "${name}${if hdf5 then "-hdf5" else ""}${if mpi then "-mpi" else ""}"; + version = "${version}"; + src = pkgs.fetchgit { + url = "https://github.com/ornladios/ADIOS2/"; + rev = "v${version}"; + sha256 = "sha256-NVyw7xoPutXeUS87jjVv1YxJnwNGZAT4QfkBLzvQbwg="; + }; + + nativeBuildInputs = + with pkgs; + [ + cmake + libgcc + perl + breakpointHook + ] + ++ (if mpi then [ openmpi ] else [ ]); + + buildInputs = if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]; + + configurePhase = '' + cmake -B build $src \ + -D CMAKE_CXX_STANDARD=17 \ + -D CMAKE_CXX_EXTENSIONS=OFF \ + -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ + -D BUILD_SHARED_LIBS=ON \ + -D ADIOS2_USE_HDF5=${if hdf5 then "ON" else "OFF"} \ + -D ADIOS2_USE_Python=OFF \ + -D ADIOS2_USE_Fortran=OFF \ + -D ADIOS2_USE_ZeroMQ=OFF \ + -D BUILD_TESTING=OFF \ + -D ADIOS2_BUILD_EXAMPLES=OFF \ + -D ADIOS2_USE_MPI=${if mpi then "ON" else "OFF"} \ + -D ADIOS2_HAVE_HDF5_VOL=OFF \ + -D CMAKE_BUILD_TYPE=Release + ''; + + buildPhase = '' + cmake --build build -j + ''; + + installPhase = '' + sed -i '/if(CMAKE_INSTALL_COMPONENT/,/^[[:space:]]&endif()$/d' build/cmake/install/post/cmake_install.cmake + cmake --install build --prefix $out + chmod +x build/cmake/install/post/generate-adios2-config.sh + sh build/cmake/install/post/generate-adios2-config.sh $out + ''; + + enableParallelBuilding = true; +} diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix new file mode 100644 index 000000000..22358a837 --- /dev/null +++ b/dev/nix/shell.nix @@ -0,0 +1,56 @@ +{ + pkgs ? import { }, + mpi ? false, + hdf5 ? false, +}: + +let + name = "entity-dev"; + compilerPkg = pkgs.gcc13; + compilerCXX = "g++"; + compilerCC = "gcc"; + adios2Pkg = (pkgs.callPackage ./adios2.nix { inherit pkgs mpi hdf5; }); +in +pkgs.mkShell { + name = "${name}-env"; + nativeBuildInputs = + with pkgs; + [ + zlib + cmake + + compilerPkg + + clang-tools + + adios2Pkg + python312 + python312Packages.jupyter + + cmake-format + neocmakelsp + black + pyright + taplo + vscode-langservers-extracted + ] + ++ (if mpi then [ pkgs.openmpi ] else [ ]) + ++ (if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]); + + LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath ([ + pkgs.clang19Stdenv.cc.cc + pkgs.zlib + ]); + + shellHook = '' + BLUE='\033[0;34m' + NC='\033[0m' + export CC=$(which ${compilerCC}) + export CXX=$(which ${compilerCXX}) + export CMAKE_CXX_COMPILER=$(which ${compilerCXX}) + export CMAKE_C_COMPILER=$(which ${compilerCC}) + + echo "" + echo -e "${name} nix-shell activated: ''\${BLUE}$(which ${compilerCXX})''\${NC}" + ''; +} From 5d1d8f71bcc24b7a48bfb1d8928b6355be027b3c Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 22 Jan 2025 15:23:57 -0500 Subject: [PATCH 084/124] comment --- src/framework/domain/comm_mpi.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 370c02b18..eb77ecbb3 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -345,6 +345,8 @@ namespace comm { auto& this_weight = species.weight; auto& this_tag = species.tag; + // @TODO_1.2.0: communicate payloads + // number of arrays of each type to send/recv const unsigned short NREALS = 4 + static_cast( D == Dim::_2D and C != Coord::Cart); From 179928f6f99a5bcb4c4aa1cd37795a9e66c9a6cb Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 23 Jan 2025 09:12:58 -0500 Subject: [PATCH 085/124] plds are now 2d array --- src/checkpoint/reader.cpp | 38 ++++-- src/checkpoint/reader.h | 8 ++ src/checkpoint/tests/CMakeLists.txt | 2 +- src/checkpoint/tests/checkpoint-mpi.cpp | 149 ++++++++++++++++-------- src/checkpoint/writer.cpp | 33 +++++- src/checkpoint/writer.h | 8 ++ src/framework/containers/particles.cpp | 102 +++++----------- src/framework/containers/particles.h | 35 ++---- src/framework/domain/checkpoint.cpp | 32 ++--- src/framework/domain/comm_mpi.hpp | 1 + src/framework/tests/particles.cpp | 8 +- src/global/arch/mpi_tags.h | 13 ++- 12 files changed, 250 insertions(+), 179 deletions(-) diff --git a/src/checkpoint/reader.cpp b/src/checkpoint/reader.cpp index e89b7d384..208972561 100644 --- a/src/checkpoint/reader.cpp +++ b/src/checkpoint/reader.cpp @@ -35,16 +35,17 @@ namespace checkpoint { reader.Get(field_var, array_h.data(), adios2::Mode::Sync); Kokkos::deep_copy(array, array_h); } else { - raise::Error(fmt::format("Field variable: %s not found", field.c_str()), HERE); + raise::Error(fmt::format("Field variable: %s not found", field.c_str()), + HERE); } } - auto ReadParticleCount(adios2::IO& io, - adios2::Engine& reader, - unsigned short s, - std::size_t local_dom, - std::size_t ndomains) - -> std::pair { + auto ReadParticleCount( + adios2::IO& io, + adios2::Engine& reader, + unsigned short s, + std::size_t local_dom, + std::size_t ndomains) -> std::pair { logger::Checkpoint(fmt::format("Reading particle count for: %d", s + 1), HERE); auto npart_var = io.InquireVariable( fmt::format("s%d_npart", s + 1)); @@ -109,6 +110,29 @@ namespace checkpoint { } } + void ReadParticlePayloads(adios2::IO& io, + adios2::Engine& reader, + unsigned short s, + array_t& array, + std::size_t nplds, + std::size_t count, + std::size_t offset) { + logger::Checkpoint(fmt::format("Reading quantity: s%d_plds", s + 1), HERE); + auto var = io.InquireVariable(fmt::format("s%d_plds", s + 1)); + if (var) { + var.SetSelection(adios2::Box({ offset, 0 }, { count, nplds })); + const auto slice = std::pair { 0, count }; + auto array_h = Kokkos::create_mirror_view(array); + reader.Get(var, + Kokkos::subview(array_h, slice, range_tuple_t(0, nplds)).data(), + adios2::Mode::Sync); + Kokkos::deep_copy(Kokkos::subview(array, slice, range_tuple_t(0, nplds)), + Kokkos::subview(array_h, slice, range_tuple_t(0, nplds))); + } else { + raise::Error(fmt::format("Variable: s%d_plds not found", s + 1), HERE); + } + } + template void ReadFields(adios2::IO&, adios2::Engine&, const std::string&, diff --git a/src/checkpoint/reader.h b/src/checkpoint/reader.h index 2ea11bdb1..e5a91ab75 100644 --- a/src/checkpoint/reader.h +++ b/src/checkpoint/reader.h @@ -45,6 +45,14 @@ namespace checkpoint { std::size_t, std::size_t); + void ReadParticlePayloads(adios2::IO&, + adios2::Engine&, + unsigned short, + array_t&, + std::size_t, + std::size_t, + std::size_t); + } // namespace checkpoint #endif // CHECKPOINT_READER_H diff --git a/src/checkpoint/tests/CMakeLists.txt b/src/checkpoint/tests/CMakeLists.txt index 10836554b..54400652e 100644 --- a/src/checkpoint/tests/CMakeLists.txt +++ b/src/checkpoint/tests/CMakeLists.txt @@ -25,5 +25,5 @@ endfunction() if(NOT ${mpi}) gen_test(checkpoint-nompi) else() - # gen_test(checkpoint-mpi) + gen_test(checkpoint-mpi) endif() diff --git a/src/checkpoint/tests/checkpoint-mpi.cpp b/src/checkpoint/tests/checkpoint-mpi.cpp index 3ce4bab14..f97202ab1 100644 --- a/src/checkpoint/tests/checkpoint-mpi.cpp +++ b/src/checkpoint/tests/checkpoint-mpi.cpp @@ -39,36 +39,53 @@ auto main(int argc, char* argv[]) -> int { // | | | // | 0 | 1 | // |------|------| - constexpr auto g_nx1 = 20; - constexpr auto g_nx2 = 15; - constexpr auto g_nx1_gh = g_nx1 + 4 * N_GHOSTS; - constexpr auto g_nx2_gh = g_nx2 + 4 * N_GHOSTS; + const std::size_t g_nx1 = 20; + const std::size_t g_nx2 = 15; + const std::size_t g_nx1_gh = g_nx1 + 4 * N_GHOSTS; + const std::size_t g_nx2_gh = g_nx2 + 4 * N_GHOSTS; - constexpr auto l_nx1 = 10; - constexpr auto l_nx2 = (rank < 2) ? 10 : 5; + const std::size_t l_nx1 = 10; + const std::size_t l_nx2 = (rank < 2) ? 10 : 5; - constexpr auto l_nx1_gh = l_nx1 + 2 * N_GHOSTS; - constexpr auto l_nx2_gh = l_nx2 + 2 * N_GHOSTS; + const std::size_t l_nx1_gh = l_nx1 + 2 * N_GHOSTS; + const std::size_t l_nx2_gh = l_nx2 + 2 * N_GHOSTS; - constexpr auto l_corner_x1 = (rank % 2) * l_nx1; - constexpr auto l_corner_x2 = (rank / 2) * l_nx2; + const std::size_t l_corner_x1 = (rank % 2 == 0) ? 0 : l_nx1_gh; + const std::size_t l_corner_x2 = (rank < 2) ? 0 : l_nx2_gh; - constexpr auto i1min = N_GHOSTS; - constexpr auto i2min = N_GHOSTS; - constexpr auto i1max = l_nx1 + N_GHOSTS; - constexpr auto i2max = l_nx2 + N_GHOSTS; + const std::size_t i1min = N_GHOSTS; + const std::size_t i2min = N_GHOSTS; + const std::size_t i1max = l_nx1 + N_GHOSTS; + const std::size_t i2max = l_nx2 + N_GHOSTS; - constexpr auto npart1 = (rank % 2 + rank) * 23 + 100; - constexpr auto npart2 = (rank % 2 + rank) * 37 + 100; + const std::size_t npart1 = (rank % 2 + rank) * 23 + 100; + const std::size_t npart2 = (rank % 2 + rank) * 37 + 100; + + std::size_t npart1_offset = 0; + std::size_t npart2_offset = 0; + + std::size_t npart1_globtot = 0; + std::size_t npart2_globtot = 0; + + for (auto r = 0; r < rank - 1; ++r) { + npart1_offset += (r % 2 + r) * 23 + 100; + npart2_offset += (r % 2 + r) * 37 + 100; + } + + for (auto r = 0; r < size; ++r) { + npart1_globtot += (r % 2 + r) * 23 + 100; + npart2_globtot += (r % 2 + r) * 37 + 100; + } // init data ndfield_t field1 { "fld1", l_nx1_gh, l_nx2_gh }; ndfield_t field2 { "fld2", l_nx1_gh, l_nx2_gh }; - array_t i1 { "i_1", npart1 }; - array_t u1 { "u_1", npart1 }; - array_t i2 { "i_2", npart2 }; - array_t u2 { "u_2", npart2 }; + array_t i1 { "i_1", npart1 }; + array_t u1 { "u_1", npart1 }; + array_t i2 { "i_2", npart2 }; + array_t u2 { "u_2", npart2 }; + array_t plds1 { "plds_1", npart1, 3 }; { // fill data @@ -93,8 +110,11 @@ auto main(int argc, char* argv[]) -> int { "fillPrtl1", npart1, Lambda(index_t p) { - u1(p) = static_cast(p); - i1(p) = static_cast(p); + u1(p) = static_cast(p); + i1(p) = static_cast(p); + plds1(p, 0) = static_cast(p); + plds1(p, 1) = static_cast(p * p); + plds1(p, 2) = static_cast(p * p * p); }); Kokkos::parallel_for( "fillPrtl2", @@ -115,8 +135,9 @@ auto main(int argc, char* argv[]) -> int { writer.defineFieldVariables(SimEngine::GRPIC, { g_nx1_gh, g_nx2_gh }, { l_corner_x1, l_corner_x2 }, - { l_nx1, l_nx2 }); - writer.defineParticleVariables(Coord::Sph, Dim::_2D, 2, { 0, 0 }); + { l_nx1_gh, l_nx2_gh }); + + writer.defineParticleVariables(Coord::Sph, Dim::_2D, 2, { 3, 0 }); writer.beginSaving(0, 0.0); @@ -126,41 +147,66 @@ auto main(int argc, char* argv[]) -> int { writer.savePerDomainVariable("s1_npart", 1, 0, npart1); writer.savePerDomainVariable("s2_npart", 1, 0, npart2); - writer.saveParticleQuantity("s1_i1", npart1, 0, npart1, i1); - writer.saveParticleQuantity("s1_ux1", npart1, 0, npart1, u1); - writer.saveParticleQuantity("s2_i1", npart2, 0, npart2, i2); - writer.saveParticleQuantity("s2_ux1", npart2, 0, npart2, u2); + writer.saveParticleQuantity("s1_i1", + npart1_globtot, + npart1_offset, + npart1, + i1); + writer.saveParticleQuantity("s1_ux1", + npart1_globtot, + npart1_offset, + npart1, + u1); + writer.saveParticleQuantity("s2_i1", + npart2_globtot, + npart2_offset, + npart2, + i2); + writer.saveParticleQuantity("s2_ux1", + npart2_globtot, + npart2_offset, + npart2, + u2); + + writer.saveParticlePayloads("s1_plds", + 3, + npart1_globtot, + npart1_offset, + npart1, + plds1); writer.endSaving(); } { // read checkpoint - ndfield_t field1_read { "fld1_read", nx1_gh, nx2_gh, nx3_gh }; - ndfield_t field2_read { "fld2_read", nx1_gh, nx2_gh, nx3_gh }; + ndfield_t field1_read { "fld1_read", l_nx1_gh, l_nx2_gh }; + ndfield_t field2_read { "fld2_read", l_nx1_gh, l_nx2_gh }; - array_t i1_read { "i_1", npart1 }; - array_t u1_read { "u_1", npart1 }; - array_t i2_read { "i_2", npart2 }; - array_t u2_read { "u_2", npart2 }; + array_t i1_read { "i_1", npart1 }; + array_t u1_read { "u_1", npart1 }; + array_t i2_read { "i_2", npart2 }; + array_t u2_read { "u_2", npart2 }; + array_t plds1_read { "plds_1", npart1, 3 }; adios2::IO io = adios.DeclareIO("checkpointRead"); adios2::Engine reader = io.Open("checkpoints/step-00000000.bp", adios2::Mode::Read); reader.BeginStep(); - auto fieldRange = adios2::Box({ 0, 0, 0, 0 }, - { nx1_gh, nx2_gh, nx3_gh, 6 }); - ReadFields(io, reader, "em", fieldRange, field1_read); - ReadFields(io, reader, "em0", fieldRange, field2_read); + auto fieldRange = adios2::Box({ l_corner_x1, l_corner_x2, 0 }, + { l_nx1_gh, l_nx2_gh, 6 }); + ReadFields(io, reader, "em", fieldRange, field1_read); + ReadFields(io, reader, "em0", fieldRange, field2_read); - auto [nprtl1, noff1] = ReadParticleCount(io, reader, 0, 0, 1); - auto [nprtl2, noff2] = ReadParticleCount(io, reader, 1, 0, 1); + auto [nprtl1, noff1] = ReadParticleCount(io, reader, 0, rank, size); + auto [nprtl2, noff2] = ReadParticleCount(io, reader, 1, rank, size); ReadParticleData(io, reader, "ux1", 0, u1_read, nprtl1, noff1); ReadParticleData(io, reader, "ux1", 1, u2_read, nprtl2, noff2); ReadParticleData(io, reader, "i1", 0, i1_read, nprtl1, noff1); ReadParticleData(io, reader, "i1", 1, i2_read, nprtl2, noff2); + ReadParticlePayloads(io, reader, 0, plds1_read, 3, nprtl1, noff1); reader.EndStep(); reader.Close(); @@ -168,15 +214,13 @@ auto main(int argc, char* argv[]) -> int { // check the validity Kokkos::parallel_for( "checkFields", - CreateRangePolicy({ 0, 0, 0 }, { nx1_gh, nx2_gh, nx3_gh }), - Lambda(index_t i1, index_t i2, index_t i3) { + CreateRangePolicy({ 0, 0 }, { l_nx1_gh, l_nx2_gh }), + Lambda(index_t i1, index_t i2) { for (int i = 0; i < 6; ++i) { - if (not cmp::AlmostEqual(field1(i1, i2, i3, i), - field1_read(i1, i2, i3, i))) { + if (not cmp::AlmostEqual(field1(i1, i2, i), field1_read(i1, i2, i))) { raise::KernelError(HERE, "Field1 read failed"); } - if (not cmp::AlmostEqual(field2(i1, i2, i3, i), - field2_read(i1, i2, i3, i))) { + if (not cmp::AlmostEqual(field2(i1, i2, i), field2_read(i1, i2, i))) { raise::KernelError(HERE, "Field2 read failed"); } } @@ -184,12 +228,12 @@ auto main(int argc, char* argv[]) -> int { raise::ErrorIf(npart1 != nprtl1, "Particle count 1 mismatch", HERE); raise::ErrorIf(npart2 != nprtl2, "Particle count 2 mismatch", HERE); - raise::ErrorIf(noff1 != 0, "Particle offset 1 mismatch", HERE); - raise::ErrorIf(noff2 != 0, "Particle offset 2 mismatch", HERE); + raise::ErrorIf(noff1 != npart1_offset, "Particle offset 1 mismatch", HERE); + raise::ErrorIf(noff2 != npart2_offset, "Particle offset 2 mismatch", HERE); Kokkos::parallel_for( "checkPrtl1", - npart1, + nprtl1, Lambda(index_t p) { if (not cmp::AlmostEqual(u1(p), u1_read(p))) { raise::KernelError(HERE, "u1 read failed"); @@ -197,10 +241,15 @@ auto main(int argc, char* argv[]) -> int { if (i1(p) != i1_read(p)) { raise::KernelError(HERE, "i1 read failed"); } + for (auto l = 0; l < 3; ++l) { + if (not cmp::AlmostEqual(plds1(p, l), plds1_read(p, l))) { + raise::KernelError(HERE, "plds1 read failed"); + } + } }); Kokkos::parallel_for( "checkPrtl2", - npart2, + nprtl2, Lambda(index_t p) { if (not cmp::AlmostEqual(u2(p), u2_read(p))) { raise::KernelError(HERE, "u2 read failed"); diff --git a/src/checkpoint/writer.cpp b/src/checkpoint/writer.cpp index 9ef0b51c7..a12e3ef26 100644 --- a/src/checkpoint/writer.cpp +++ b/src/checkpoint/writer.cpp @@ -84,6 +84,7 @@ namespace checkpoint { { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); + for (auto d { 0u }; d < dim; ++d) { m_io.DefineVariable(fmt::format("s%d_i%d", s + 1, d + 1), { adios2::UnknownDim }, @@ -102,18 +103,21 @@ namespace checkpoint { { adios2::UnknownDim }, { adios2::UnknownDim }); } + if (dim == Dim::_2D and C != ntt::Coord::Cart) { m_io.DefineVariable(fmt::format("s%d_phi", s + 1), { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); } + for (auto d { 0u }; d < 3; ++d) { m_io.DefineVariable(fmt::format("s%d_ux%d", s + 1, d + 1), { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); } + m_io.DefineVariable(fmt::format("s%d_tag", s + 1), { adios2::UnknownDim }, { adios2::UnknownDim }, @@ -122,11 +126,11 @@ namespace checkpoint { { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); - for (auto p { 0u }; p < nplds[s]; ++p) { - m_io.DefineVariable(fmt::format("s%d_pld%d", s + 1, p + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); + if (nplds[s] > 0) { + m_io.DefineVariable(fmt::format("s%d_plds", s + 1), + { adios2::UnknownDim, nplds[s] }, + { adios2::UnknownDim, 0 }, + { adios2::UnknownDim, nplds[s] }); } } } @@ -238,6 +242,25 @@ namespace checkpoint { m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); } + void Writer::saveParticlePayloads(const std::string& quantity, + std::size_t nplds, + std::size_t glob_total, + std::size_t loc_offset, + std::size_t loc_size, + const array_t& data) { + const auto slice = range_tuple_t(0, loc_size); + auto var = m_io.InquireVariable(quantity); + + var.SetShape({ glob_total, nplds }); + var.SetSelection( + adios2::Box({ loc_offset, 0 }, { loc_size, nplds })); + + auto data_h = Kokkos::create_mirror_view(data); + Kokkos::deep_copy(data_h, data); + auto data_sub = Kokkos::subview(data_h, slice, range_tuple_t(0, nplds)); + m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); + } + template void Writer::savePerDomainVariable(const std::string&, std::size_t, std::size_t, diff --git a/src/checkpoint/writer.h b/src/checkpoint/writer.h index 34b5f043f..346bee24a 100644 --- a/src/checkpoint/writer.h +++ b/src/checkpoint/writer.h @@ -69,10 +69,18 @@ namespace checkpoint { std::size_t, const array_t&); + void saveParticlePayloads(const std::string&, + std::size_t, + std::size_t, + std::size_t, + std::size_t, + const array_t&); + void defineFieldVariables(const ntt::SimEngine&, const std::vector&, const std::vector&, const std::vector&); + void defineParticleVariables(const ntt::Coord&, Dimension, std::size_t, diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index 758118d6c..fc8214824 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -28,55 +28,43 @@ namespace ntt { const Cooling& cooling, unsigned short npld) : ParticleSpecies(index, label, m, ch, maxnpart, pusher, use_gca, cooling, npld) { - i1 = array_t { label + "_i1", maxnpart }; - i1_h = Kokkos::create_mirror_view(i1); - dx1 = array_t { label + "_dx1", maxnpart }; - dx1_h = Kokkos::create_mirror_view(dx1); - - i1_prev = array_t { label + "_i1_prev", maxnpart }; - dx1_prev = array_t { label + "_dx1_prev", maxnpart }; - - ux1 = array_t { label + "_ux1", maxnpart }; - ux1_h = Kokkos::create_mirror_view(ux1); - ux2 = array_t { label + "_ux2", maxnpart }; - ux2_h = Kokkos::create_mirror_view(ux2); - ux3 = array_t { label + "_ux3", maxnpart }; - ux3_h = Kokkos::create_mirror_view(ux3); - - weight = array_t { label + "_w", maxnpart }; - weight_h = Kokkos::create_mirror_view(weight); - - tag = array_t { label + "_tag", maxnpart }; - tag_h = Kokkos::create_mirror_view(tag); - - for (unsigned short n { 0 }; n < npld; ++n) { - pld.push_back(array_t("pld", maxnpart)); - pld_h.push_back(Kokkos::create_mirror_view(pld[n])); - } - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - i2 = array_t { label + "_i2", maxnpart }; - i2_h = Kokkos::create_mirror_view(i2); - dx2 = array_t { label + "_dx2", maxnpart }; - dx2_h = Kokkos::create_mirror_view(dx2); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1 = array_t { label + "_i1", maxnpart }; + dx1 = array_t { label + "_dx1", maxnpart }; + i1_prev = array_t { label + "_i1_prev", maxnpart }; + dx1_prev = array_t { label + "_dx1_prev", maxnpart }; + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2 = array_t { label + "_i2", maxnpart }; + dx2 = array_t { label + "_dx2", maxnpart }; i2_prev = array_t { label + "_i2_prev", maxnpart }; dx2_prev = array_t { label + "_dx2_prev", maxnpart }; } - if ((D == Dim::_2D) && (C != Coord::Cart)) { - phi = array_t { label + "_phi", maxnpart }; - phi_h = Kokkos::create_mirror_view(phi); - } if constexpr (D == Dim::_3D) { - i3 = array_t { label + "_i3", maxnpart }; - i3_h = Kokkos::create_mirror_view(i3); - dx3 = array_t { label + "_dx3", maxnpart }; - dx3_h = Kokkos::create_mirror_view(dx3); - + i3 = array_t { label + "_i3", maxnpart }; + dx3 = array_t { label + "_dx3", maxnpart }; i3_prev = array_t { label + "_i3_prev", maxnpart }; dx3_prev = array_t { label + "_dx3_prev", maxnpart }; } + + ux1 = array_t { label + "_ux1", maxnpart }; + ux2 = array_t { label + "_ux2", maxnpart }; + ux3 = array_t { label + "_ux3", maxnpart }; + + weight = array_t { label + "_w", maxnpart }; + + tag = array_t { label + "_tag", maxnpart }; + + if (npld > 0) { + pld = array_t { label + "_pld", maxnpart, npld }; + } + + if ((D == Dim::_2D) && (C != Coord::Cart)) { + phi = array_t { label + "_phi", maxnpart }; + } } template @@ -205,9 +193,10 @@ namespace ntt { RemoveDeadInArray(phi, indices_alive); } - for (auto& payload : pld) { - RemoveDeadInArray(payload, indices_alive); - } + // for (auto& payload : pld) { + // // @TODO_1.2.0: fix + // RemoveDeadInArray(payload, indices_alive); + // } Kokkos::Experimental::fill( "TagAliveParticles", @@ -226,35 +215,6 @@ namespace ntt { m_is_sorted = true; } - template - void Particles::SyncHostDevice() { - Kokkos::deep_copy(i1_h, i1); - Kokkos::deep_copy(dx1_h, dx1); - Kokkos::deep_copy(ux1_h, ux1); - Kokkos::deep_copy(ux2_h, ux2); - Kokkos::deep_copy(ux3_h, ux3); - - Kokkos::deep_copy(tag_h, tag); - Kokkos::deep_copy(weight_h, weight); - - for (auto n { 0 }; n < npld(); ++n) { - Kokkos::deep_copy(pld_h[n], pld[n]); - } - - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - Kokkos::deep_copy(i2_h, i2); - Kokkos::deep_copy(dx2_h, dx2); - } - if constexpr (D == Dim::_3D) { - Kokkos::deep_copy(i3_h, i3); - Kokkos::deep_copy(dx3_h, dx3); - } - - if ((D == Dim::_2D) && (C != Coord::Cart)) { - Kokkos::deep_copy(phi_h, phi); - } - } - template struct Particles; template struct Particles; template struct Particles; diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 3ae68b402..9024fef1e 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -48,31 +48,22 @@ namespace ntt { public: // Cell indices of the current particle - array_t i1, i2, i3; + array_t i1, i2, i3; // Displacement of a particle within the cell - array_t dx1, dx2, dx3; + array_t dx1, dx2, dx3; // Three spatial components of the covariant 4-velocity (physical units) - array_t ux1, ux2, ux3; + array_t ux1, ux2, ux3; // Particle weights. - array_t weight; + array_t weight; // Previous timestep coordinates - array_t i1_prev, i2_prev, i3_prev; - array_t dx1_prev, dx2_prev, dx3_prev; + array_t i1_prev, i2_prev, i3_prev; + array_t dx1_prev, dx2_prev, dx3_prev; // Array to tag the particles - array_t tag; - // Array to store the particle load - std::vector> pld; + array_t tag; + // Array to store the particle payloads + array_t pld; // phi coordinate (for axisymmetry) - array_t phi; - - // host mirrors - array_mirror_t i1_h, i2_h, i3_h; - array_mirror_t dx1_h, dx2_h, dx3_h; - array_mirror_t ux1_h, ux2_h, ux3_h; - array_mirror_t weight_h; - array_mirror_t phi_h; - array_mirror_t tag_h; - std::vector> pld_h; + array_t phi; // for empty allocation Particles() {} @@ -178,10 +169,8 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); - for (auto& p : pld) { - footprint += sizeof(real_t) * p.extent(0); - } - footprint += sizeof(real_t) * phi.extent(0); + footprint += sizeof(real_t) * pld.extent(0) * pld.extent(1); + footprint += sizeof(real_t) * phi.extent(0); return footprint; } diff --git a/src/framework/domain/checkpoint.cpp b/src/framework/domain/checkpoint.cpp index 3d309c090..6dfb137db 100644 --- a/src/framework/domain/checkpoint.cpp +++ b/src/framework/domain/checkpoint.cpp @@ -242,13 +242,13 @@ namespace ntt { local_domain->species[s].weight); auto nplds = local_domain->species[s].npld(); - for (auto p { 0u }; p < nplds; ++p) { - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_pld%d", s + 1, p + 1), - glob_tot, - offset, - npart, - local_domain->species[s].pld[p]); + if (nplds > 0) { + g_checkpoint_writer.saveParticlePayloads(fmt::format("s%d_plds", s + 1), + nplds, + glob_tot, + offset, + npart, + local_domain->species[s].pld); } } } @@ -451,14 +451,16 @@ namespace ntt { domain.species[s].weight, loc_npart, offset_npart); - for (auto p { 0u }; p < domain.species[s].npld(); ++p) { - checkpoint::ReadParticleData(io, - reader, - fmt::format("pld%d", p + 1), - s, - domain.species[s].pld[p], - loc_npart, - offset_npart); + + const auto nplds = domain.species[s].npld(); + if (nplds > 0) { + checkpoint::ReadParticlePayloads(io, + reader, + s, + domain.species[s].pld, + nplds, + loc_npart, + offset_npart); } domain.species[s].set_npart(loc_npart); } // species loop diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index eb77ecbb3..4f6e04eec 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -17,6 +17,7 @@ #include "arch/directions.h" #include "arch/kokkos_aliases.h" #include "arch/mpi_aliases.h" +#include "arch/mpi_tags.h" #include "utils/error.h" #include "framework/containers/particles.h" diff --git a/src/framework/tests/particles.cpp b/src/framework/tests/particles.cpp index dabcc062f..535198286 100644 --- a/src/framework/tests/particles.cpp +++ b/src/framework/tests/particles.cpp @@ -46,10 +46,8 @@ void testParticles(const int& index, raise::ErrorIf(p.tag.extent(0) != maxnpart, "tag incorrectly allocated", HERE); raise::ErrorIf(p.weight.extent(0) != maxnpart, "weight incorrectly allocated", HERE); - raise::ErrorIf(p.pld.size() != npld, "Number of payloads mismatch", HERE); - for (unsigned short n { 0 }; n < npld; ++n) { - raise::ErrorIf(p.pld[n].extent(0) != maxnpart, "pld incorrectly allocated", HERE); - } + raise::ErrorIf(p.pld.extent(1) != npld, "pld incorrectly allocated", HERE); + raise::ErrorIf(p.pld.extent(0) != maxnpart, "pld incorrectly allocated", HERE); if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { raise::ErrorIf(p.i2.extent(0) != maxnpart, "i2 incorrectly allocated", HERE); @@ -139,4 +137,4 @@ auto main(int argc, char** argv) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/global/arch/mpi_tags.h b/src/global/arch/mpi_tags.h index 0916542d4..aaf38a8f4 100644 --- a/src/global/arch/mpi_tags.h +++ b/src/global/arch/mpi_tags.h @@ -7,6 +7,8 @@ * @namespaces: * - mpi:: */ +#ifndef GLOBAL_ARCH_MPI_TAGS_H +#define GLOBAL_ARCH_MPI_TAGS_H #include "global.h" @@ -188,8 +190,13 @@ namespace mpi { tag; } - Inline auto SendTag(short tag, bool im1, bool ip1, bool jm1, bool jp1, bool km1, bool kp1) - -> short { + Inline auto SendTag(short tag, + bool im1, + bool ip1, + bool jm1, + bool jp1, + bool km1, + bool kp1) -> short { return ((im1 && jm1 && km1) * (PrtlSendTag::im1_jm1_km1 - 1) + (im1 && jm1 && kp1) * (PrtlSendTag::im1_jm1_kp1 - 1) + (im1 && jp1 && km1) * (PrtlSendTag::im1_jp1_km1 - 1) + @@ -226,3 +233,5 @@ namespace mpi { tag; } } // namespace mpi + +#endif // GLOBAL_ARCH_MPI_TAGS_H From ad026630c8814cff062683a9c6e27b693ac26e57 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 23 Jan 2025 14:52:57 -0500 Subject: [PATCH 086/124] comm kernels --- src/framework/domain/comm_mpi.hpp | 145 ++++------- src/framework/domain/communications.cpp | 55 ++--- src/kernels/comm.hpp | 309 ++++++++++++++++++++++++ 3 files changed, 369 insertions(+), 140 deletions(-) create mode 100644 src/kernels/comm.hpp diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 4f6e04eec..ff4984fa3 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -22,6 +22,8 @@ #include "framework/containers/particles.h" +#include "kernels/comm.hpp" + #include #include @@ -318,34 +320,14 @@ namespace comm { } template - void CommunicateParticles(Particles& species, - Kokkos::View outgoing_indices, - Kokkos::View tag_offsets, - std::vector npptag_vec, - std::vector npptag_recv_vec, - std::vector send_ranks, - std::vector recv_ranks, - const dir::dirs_t& dirs_to_comm) { - // Pointers to the particle data arrays - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; - auto& this_i2 = species.i2; - auto& this_i2_prev = species.i2_prev; - auto& this_i3 = species.i3; - auto& this_i3_prev = species.i3_prev; - auto& this_dx1 = species.dx1; - auto& this_dx1_prev = species.dx1_prev; - auto& this_dx2 = species.dx2; - auto& this_dx2_prev = species.dx2_prev; - auto& this_dx3 = species.dx3; - auto& this_dx3_prev = species.dx3_prev; - auto& this_phi = species.phi; - auto& this_ux1 = species.ux1; - auto& this_ux2 = species.ux2; - auto& this_ux3 = species.ux3; - auto& this_weight = species.weight; - auto& this_tag = species.tag; - + void CommunicateParticles(Particles& species, + array_t outgoing_indices, + array_t tag_offsets, + std::vector npptag_vec, + std::vector npptag_recv_vec, + std::vector send_ranks, + std::vector recv_ranks, + const dir::dirs_t& dirs_to_comm) { // @TODO_1.2.0: communicate payloads // number of arrays of each type to send/recv @@ -365,10 +347,9 @@ namespace comm { npptag_recv_vec.end(), static_cast(0)); - Kokkos::View recv_buff_int { "recv_buff_int", npart_recv * NINTS }; - Kokkos::View recv_buff_real { "recv_buff_real", npart_recv * NREALS }; - Kokkos::View recv_buff_prtldx { "recv_buff_prtldx", - npart_recv * NPRTLDX }; + array_t recv_buff_int { "recv_buff_int", npart_recv * NINTS }; + array_t recv_buff_real { "recv_buff_real", npart_recv * NREALS }; + array_t recv_buff_prtldx { "recv_buff_prtldx", npart_recv * NPRTLDX }; auto iteration = 0; auto current_received = 0; @@ -383,44 +364,26 @@ namespace comm { if (send_rank < 0 and recv_rank < 0) { continue; } - Kokkos::View send_buff_int { "send_buff_int", npart_send_in * NINTS }; - Kokkos::View send_buff_real { "send_buff_real", - npart_send_in * NREALS }; - Kokkos::View send_buff_prtldx { "send_buff_prtldx", - npart_send_in * NPRTLDX }; + array_t send_buff_int { "send_buff_int", npart_send_in * NINTS }; + array_t send_buff_real { "send_buff_real", npart_send_in * NREALS }; + array_t send_buff_prtldx { "send_buff_prtldx", + npart_send_in * NPRTLDX }; + // clang-format off Kokkos::parallel_for( "PopulateSendBuffer", npart_send_in, - Lambda(index_t p) { - const auto idx = outgoing_indices( - (tag_send > 2 ? tag_offsets(tag_send - 3) : 0) + npart_dead + p); - if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { - send_buff_int(NINTS * p + 0) = this_i1(idx); - send_buff_int(NINTS * p + 1) = this_i1_prev(idx); - send_buff_prtldx(NPRTLDX * p + 0) = this_dx1(idx); - send_buff_prtldx(NPRTLDX * p + 1) = this_dx1_prev(idx); - } - if constexpr (D == Dim::_2D or D == Dim::_3D) { - send_buff_int(NINTS * p + 2) = this_i2(idx); - send_buff_int(NINTS * p + 3) = this_i2_prev(idx); - send_buff_prtldx(NPRTLDX * p + 2) = this_dx2(idx); - send_buff_prtldx(NPRTLDX * p + 3) = this_dx2_prev(idx); - } - if constexpr (D == Dim::_3D) { - send_buff_int(NINTS * p + 4) = this_i3(idx); - send_buff_int(NINTS * p + 5) = this_i3_prev(idx); - send_buff_prtldx(NPRTLDX * p + 4) = this_dx3(idx); - send_buff_prtldx(NPRTLDX * p + 5) = this_dx3_prev(idx); - } - send_buff_real(NREALS * p + 0) = this_ux1(idx); - send_buff_real(NREALS * p + 1) = this_ux2(idx); - send_buff_real(NREALS * p + 2) = this_ux3(idx); - send_buff_real(NREALS * p + 3) = this_weight(idx); - if constexpr (D == Dim::_2D and C != Coord::Cart) { - send_buff_real(NREALS * p + 4) = this_phi(idx); - } - this_tag(idx) = ParticleTag::dead; - }); + kernel::comm::PopulatePrtlSendBuffer_kernel( + send_buff_int, send_buff_real, send_buff_prtldx, + NINTS, NREALS, NPRTLDX, + (tag_send > 2 ? tag_offsets(tag_send - 3) : 0) + npart_dead, + species.i1, species.i1_prev, species.dx1, species.dx1_prev, + species.i2, species.i2_prev, species.dx2, species.dx2_prev, + species.i3, species.i3_prev, species.dx3, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.weight, species.phi, species.tag, + outgoing_indices, tag_offsets) + ); + // clang-format on const auto recv_offset_int = current_received * NINTS; const auto recv_offset_real = current_received * NREALS; @@ -519,43 +482,25 @@ namespace comm { } // end direction loop - const auto npart = species.npart(); - const auto npart_holes = outgoing_indices.extent(0); - + // clang-format off Kokkos::parallel_for( "PopulateFromRecvBuffer", npart_recv, - Lambda(const std::size_t p) { - const auto idx = (p >= npart_holes ? npart + p - npart_holes - : outgoing_indices(p)); - if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { - this_i1(idx) = recv_buff_int(NINTS * p + 0); - this_i1_prev(idx) = recv_buff_int(NINTS * p + 1); - this_dx1(idx) = recv_buff_prtldx(NPRTLDX * p + 0); - this_dx1_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 1); - } - if constexpr (D == Dim::_2D or D == Dim::_3D) { - this_i2(idx) = recv_buff_int(NINTS * p + 2); - this_i2_prev(idx) = recv_buff_int(NINTS * p + 3); - this_dx2(idx) = recv_buff_prtldx(NPRTLDX * p + 2); - this_dx2_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 3); - } - if constexpr (D == Dim::_3D) { - this_i3(idx) = recv_buff_int(NINTS * p + 4); - this_i3_prev(idx) = recv_buff_int(NINTS * p + 5); - this_dx3(idx) = recv_buff_prtldx(NPRTLDX * p + 4); - this_dx3_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 5); - } - this_ux1(idx) = recv_buff_real(NREALS * p + 0); - this_ux2(idx) = recv_buff_real(NREALS * p + 1); - this_ux3(idx) = recv_buff_real(NREALS * p + 2); - this_weight(idx) = recv_buff_real(NREALS * p + 3); - if constexpr (D == Dim::_2D and C != Coord::Cart) { - this_phi(idx) = recv_buff_real(NREALS * p + 4); - } - this_tag(idx) = ParticleTag::alive; - }); + kernel::comm::ExtractReceivedPrtls_kernel( + recv_buff_int, recv_buff_real, recv_buff_prtldx, + NINTS, NREALS, NPRTLDX, + species.npart(), + species.i1, species.i1_prev, species.dx1, species.dx1_prev, + species.i2, species.i2_prev, species.dx2, species.dx2_prev, + species.i3, species.i3_prev, species.dx3, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.weight, species.phi, species.tag, + outgoing_indices) + ); + // clang-format on + const auto npart = species.npart(); + const auto npart_holes = outgoing_indices.extent(0); if (npart_recv > npart_holes) { species.set_npart(npart + npart_recv - npart_holes); } diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 6175cc4bb..fc065ab9d 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -20,6 +20,7 @@ #include "arch/mpi_tags.h" #include "framework/domain/comm_mpi.hpp" + #include "kernels/comm.hpp" #else #include "framework/domain/comm_nompi.hpp" #endif @@ -601,50 +602,24 @@ namespace ntt { } } // end directions loop - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; - auto& this_i2 = species.i2; - auto& this_i2_prev = species.i2_prev; - auto& this_i3 = species.i3; - auto& this_i3_prev = species.i3_prev; + array_t outgoing_indices { "outgoing_indices", + npart - npart_alive }; - array_t outgoing_indices("outgoing_indices", - npart - npart_alive); - - array_t current_offset("current_offset", ntags); + // clang-format off Kokkos::parallel_for( "OutgoingIndicesAndDisplace", species.rangeActiveParticles(), - Lambda(index_t p) { - if (this_tag(p) != ParticleTag::alive) { - // dead or to-be-sent - const auto idx_for_tag = - Kokkos::atomic_fetch_add(¤t_offset(this_tag(p)), 1) + - (this_tag(p) != ParticleTag::dead ? npart_dead : 0) + - (this_tag(p) > 2 ? tag_offsets(this_tag(p) - 3) : 0); - if (idx_for_tag >= npart - npart_alive) { - raise::KernelError(HERE, - "Outgoing indices idx exceeds the array size"); - } - outgoing_indices(idx_for_tag) = p; - // apply offsets - if (this_tag(p) != ParticleTag::dead) { - if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { - this_i1(p) += shifts_in_x1(this_tag(p) - 2); - this_i1_prev(p) += shifts_in_x1(this_tag(p) - 2); - } - if constexpr (D == Dim::_2D or D == Dim::_3D) { - this_i2(p) += shifts_in_x2(this_tag(p) - 2); - this_i2_prev(p) += shifts_in_x2(this_tag(p) - 2); - } - if constexpr (D == Dim::_3D) { - this_i3(p) += shifts_in_x3(this_tag(p) - 2); - this_i3_prev(p) += shifts_in_x3(this_tag(p) - 2); - } - } - } - }); + kernel::comm::PrepareOutgoingPrtls_kernel( + shifts_in_x1, shifts_in_x2, shifts_in_x3, + outgoing_indices, + npart, npart_alive, npart_dead, ntags, + species.i1, species.i1_prev, + species.i2, species.i2_prev, + species.i3, species.i3_prev, + species.tag, tag_offsets) + ); + // clang-format on + comm::CommunicateParticles(species, outgoing_indices, tag_offsets, diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp new file mode 100644 index 000000000..c470cd4f6 --- /dev/null +++ b/src/kernels/comm.hpp @@ -0,0 +1,309 @@ +/** + * @file kernels/comm.hpp + * @brief Kernels used during communications + * @implements + * - kernel::comm::PrepareOutgoingPrtls_kernel<> + * - kernel::comm::PopulatePrtlSendBuffer_kernel<> + * - kernel::comm::ExtractReceivedPrtls_kernel<> + * @namespaces: + * - kernel::comm:: + */ + +#ifndef KERNELS_COMM_HPP +#define KERNELS_COMM_HPP + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" + +#include + +namespace kernel::comm { + using namespace ntt; + + template + class PrepareOutgoingPrtls_kernel { + const array_t shifts_in_x1, shifts_in_x2, shifts_in_x3; + array_t outgoing_indices; + + const std::size_t npart, npart_alive, npart_dead, ntags; + + array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; + const array_t tag; + + const array_t tag_offsets; + + array_t current_offset; + + public: + PrepareOutgoingPrtls_kernel(const array_t& shifts_in_x1, + const array_t& shifts_in_x2, + const array_t& shifts_in_x3, + array_t& outgoing_indices, + std::size_t npart, + std::size_t npart_alive, + std::size_t npart_dead, + std::size_t ntags, + array_t& i1, + array_t& i1_prev, + array_t& i2, + array_t& i2_prev, + array_t& i3, + array_t& i3_prev, + const array_t& tag, + const array_t& tag_offsets) + : shifts_in_x1 { shifts_in_x1 } + , shifts_in_x2 { shifts_in_x2 } + , shifts_in_x3 { shifts_in_x3 } + , outgoing_indices { outgoing_indices } + , npart { npart } + , npart_alive { npart_alive } + , npart_dead { npart_dead } + , ntags { ntags } + , i1 { i1 } + , i1_prev { i1_prev } + , i2 { i2 } + , i2_prev { i2_prev } + , i3 { i3 } + , i3_prev { i3_prev } + , tag { tag } + , tag_offsets { tag_offsets } + , current_offset { "current_offset", ntags } {} + + Inline void operator()(index_t p) const { + if (tag(p) != ParticleTag::alive) { + // dead or to-be-sent + const auto idx_for_tag = Kokkos::atomic_fetch_add(¤t_offset(tag(p)), + 1) + + (tag(p) != ParticleTag::dead ? npart_dead : 0) + + (tag(p) > 2 ? tag_offsets(tag(p) - 3) : 0); + if (idx_for_tag >= npart - npart_alive) { + raise::KernelError(HERE, "Outgoing indices idx exceeds the array size"); + } + outgoing_indices(idx_for_tag) = p; + // apply offsets + if (tag(p) != ParticleTag::dead) { + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1(p) += shifts_in_x1(tag(p) - 2); + i1_prev(p) += shifts_in_x1(tag(p) - 2); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2(p) += shifts_in_x2(tag(p) - 2); + i2_prev(p) += shifts_in_x2(tag(p) - 2); + } + if constexpr (D == Dim::_3D) { + i3(p) += shifts_in_x3(tag(p) - 2); + i3_prev(p) += shifts_in_x3(tag(p) - 2); + } + } + } + } + }; + + template + class PopulatePrtlSendBuffer_kernel { + array_t send_buff_int; + array_t send_buff_real; + array_t send_buff_prtldx; + + const unsigned short NINTS, NREALS, NPRTLDX; + const std::size_t idx_offset; + + const array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; + const array_t ux1, ux2, ux3, weight, phi; + const array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; + array_t tag; + const array_t outgoing_indices, tag_offsets; + + public: + PopulatePrtlSendBuffer_kernel(array_t& send_buff_int, + array_t& send_buff_real, + array_t& send_buff_prtldx, + unsigned short NINTS, + unsigned short NREALS, + unsigned short NPRTLDX, + std::size_t idx_offset, + const array_t& i1, + const array_t& i1_prev, + const array_t& dx1, + const array_t& dx1_prev, + const array_t& i2, + const array_t& i2_prev, + const array_t& dx2, + const array_t& dx2_prev, + const array_t& i3, + const array_t& i3_prev, + const array_t& dx3, + const array_t& dx3_prev, + const array_t& ux1, + const array_t& ux2, + const array_t& ux3, + const array_t& weight, + const array_t& phi, + array_t& tag, + const array_t& outgoing_indices, + const array_t& tag_offsets) + : send_buff_int { send_buff_int } + , send_buff_real { send_buff_real } + , send_buff_prtldx { send_buff_prtldx } + , NINTS { NINTS } + , NREALS { NREALS } + , NPRTLDX { NPRTLDX } + , idx_offset { idx_offset } + , i1 { i1 } + , i1_prev { i1_prev } + , dx1 { dx1 } + , dx1_prev { dx1_prev } + , i2 { i2 } + , i2_prev { i2_prev } + , dx2 { dx2 } + , dx2_prev { dx2_prev } + , i3 { i3 } + , i3_prev { i3_prev } + , dx3 { dx3 } + , dx3_prev { dx3_prev } + , ux1 { ux1 } + , ux2 { ux2 } + , ux3 { ux3 } + , weight { weight } + , phi { phi } + , tag { tag } + , outgoing_indices { outgoing_indices } + , tag_offsets { tag_offsets } {} + + Inline void operator()(index_t p) const { + const auto idx = outgoing_indices(idx_offset + p); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + send_buff_int(NINTS * p + 0) = i1(idx); + send_buff_int(NINTS * p + 1) = i1_prev(idx); + send_buff_prtldx(NPRTLDX * p + 0) = dx1(idx); + send_buff_prtldx(NPRTLDX * p + 1) = dx1_prev(idx); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + send_buff_int(NINTS * p + 2) = i2(idx); + send_buff_int(NINTS * p + 3) = i2_prev(idx); + send_buff_prtldx(NPRTLDX * p + 2) = dx2(idx); + send_buff_prtldx(NPRTLDX * p + 3) = dx2_prev(idx); + } + if constexpr (D == Dim::_3D) { + send_buff_int(NINTS * p + 4) = i3(idx); + send_buff_int(NINTS * p + 5) = i3_prev(idx); + send_buff_prtldx(NPRTLDX * p + 4) = dx3(idx); + send_buff_prtldx(NPRTLDX * p + 5) = dx3_prev(idx); + } + send_buff_real(NREALS * p + 0) = ux1(idx); + send_buff_real(NREALS * p + 1) = ux2(idx); + send_buff_real(NREALS * p + 2) = ux3(idx); + send_buff_real(NREALS * p + 3) = weight(idx); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + send_buff_real(NREALS * p + 4) = phi(idx); + } + tag(idx) = ParticleTag::dead; + } + }; + + template + class ExtractReceivedPrtls_kernel { + const array_t recv_buff_int; + const array_t recv_buff_real; + const array_t recv_buff_prtldx; + + const unsigned short NINTS, NREALS, NPRTLDX; + const std::size_t npart, npart_holes; + + array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; + array_t ux1, ux2, ux3, weight, phi; + array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; + array_t tag; + const array_t outgoing_indices; + + public: + ExtractReceivedPrtls_kernel(const array_t& recv_buff_int, + const array_t& recv_buff_real, + const array_t& recv_buff_prtldx, + unsigned short NINTS, + unsigned short NREALS, + unsigned short NPRTLDX, + std::size_t npart, + array_t& i1, + array_t& i1_prev, + array_t& dx1, + array_t& dx1_prev, + array_t& i2, + array_t& i2_prev, + array_t& dx2, + array_t& dx2_prev, + array_t& i3, + array_t& i3_prev, + array_t& dx3, + array_t& dx3_prev, + array_t& ux1, + array_t& ux2, + array_t& ux3, + array_t& weight, + array_t& phi, + array_t& tag, + const array_t& outgoing_indices) + : recv_buff_int { recv_buff_int } + , recv_buff_real { recv_buff_real } + , recv_buff_prtldx { recv_buff_prtldx } + , NINTS { NINTS } + , NREALS { NREALS } + , NPRTLDX { NPRTLDX } + , npart { npart } + , npart_holes { outgoing_indices.extent(0) } + , i1 { i1 } + , i1_prev { i1_prev } + , dx1 { dx1 } + , dx1_prev { dx1_prev } + , i2 { i2 } + , i2_prev { i2_prev } + , dx2 { dx2 } + , dx2_prev { dx2_prev } + , i3 { i3 } + , i3_prev { i3_prev } + , dx3 { dx3 } + , dx3_prev { dx3_prev } + , ux1 { ux1 } + , ux2 { ux2 } + , ux3 { ux3 } + , weight { weight } + , phi { phi } + , tag { tag } {} + + Inline void operator()(index_t p) const { + const auto idx = (p >= npart_holes ? npart + p - npart_holes + : outgoing_indices(p)); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1(idx) = recv_buff_int(NINTS * p + 0); + i1_prev(idx) = recv_buff_int(NINTS * p + 1); + dx1(idx) = recv_buff_prtldx(NPRTLDX * p + 0); + dx1_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 1); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2(idx) = recv_buff_int(NINTS * p + 2); + i2_prev(idx) = recv_buff_int(NINTS * p + 3); + dx2(idx) = recv_buff_prtldx(NPRTLDX * p + 2); + dx2_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 3); + } + if constexpr (D == Dim::_3D) { + i3(idx) = recv_buff_int(NINTS * p + 4); + i3_prev(idx) = recv_buff_int(NINTS * p + 5); + dx3(idx) = recv_buff_prtldx(NPRTLDX * p + 4); + dx3_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 5); + } + ux1(idx) = recv_buff_real(NREALS * p + 0); + ux2(idx) = recv_buff_real(NREALS * p + 1); + ux3(idx) = recv_buff_real(NREALS * p + 2); + weight(idx) = recv_buff_real(NREALS * p + 3); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + phi(idx) = recv_buff_real(NREALS * p + 4); + } + tag(idx) = ParticleTag::alive; + } + }; + +} // namespace kernel::comm + +#endif // KERNELS_COMM_HPP From ea4366bc69ef3ca366368ead9c200b28b9d5c502 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 23 Jan 2025 15:20:50 -0500 Subject: [PATCH 087/124] minor fix in tags --- src/framework/domain/comm_mpi.hpp | 2 +- src/kernels/comm.hpp | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index ff4984fa3..70c175972 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -381,7 +381,7 @@ namespace comm { species.i3, species.i3_prev, species.dx3, species.dx3_prev, species.ux1, species.ux2, species.ux3, species.weight, species.phi, species.tag, - outgoing_indices, tag_offsets) + outgoing_indices) ); // clang-format on diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp index c470cd4f6..7446d285a 100644 --- a/src/kernels/comm.hpp +++ b/src/kernels/comm.hpp @@ -74,10 +74,15 @@ namespace kernel::comm { Inline void operator()(index_t p) const { if (tag(p) != ParticleTag::alive) { // dead or to-be-sent - const auto idx_for_tag = Kokkos::atomic_fetch_add(¤t_offset(tag(p)), - 1) + - (tag(p) != ParticleTag::dead ? npart_dead : 0) + - (tag(p) > 2 ? tag_offsets(tag(p) - 3) : 0); + auto idx_for_tag = Kokkos::atomic_fetch_add(¤t_offset(tag(p)), 1); + if (tag(p) != ParticleTag::dead) { + idx_for_tag += npart_dead; + } + if (tag(p) > 2) { + idx_for_tag += tag_offsets(tag(p) - 3); + } + // (tag(p) != ParticleTag::dead ? npart_dead : 0) + + // (tag(p) > 2 ? tag_offsets(tag(p) - 3) : 0); if (idx_for_tag >= npart - npart_alive) { raise::KernelError(HERE, "Outgoing indices idx exceeds the array size"); } @@ -114,7 +119,7 @@ namespace kernel::comm { const array_t ux1, ux2, ux3, weight, phi; const array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; array_t tag; - const array_t outgoing_indices, tag_offsets; + const array_t outgoing_indices; public: PopulatePrtlSendBuffer_kernel(array_t& send_buff_int, @@ -142,8 +147,7 @@ namespace kernel::comm { const array_t& weight, const array_t& phi, array_t& tag, - const array_t& outgoing_indices, - const array_t& tag_offsets) + const array_t& outgoing_indices) : send_buff_int { send_buff_int } , send_buff_real { send_buff_real } , send_buff_prtldx { send_buff_prtldx } @@ -169,8 +173,7 @@ namespace kernel::comm { , weight { weight } , phi { phi } , tag { tag } - , outgoing_indices { outgoing_indices } - , tag_offsets { tag_offsets } {} + , outgoing_indices { outgoing_indices } {} Inline void operator()(index_t p) const { const auto idx = outgoing_indices(idx_offset + p); From 29ef300bd920a7f3e6285e6697f73105d7321416 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 23 Jan 2025 15:44:41 -0500 Subject: [PATCH 088/124] proper tag_offsets access --- dev/nix/shell.nix | 2 +- src/framework/containers/particles.cpp | 13 +++++++------ src/framework/domain/comm_mpi.hpp | 8 ++++++-- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix index 22358a837..f9d48fbfd 100644 --- a/dev/nix/shell.nix +++ b/dev/nix/shell.nix @@ -38,7 +38,7 @@ pkgs.mkShell { ++ (if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]); LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath ([ - pkgs.clang19Stdenv.cc.cc + pkgs.stdenv.cc.cc pkgs.zlib ]); diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index fc8214824..235358760 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -97,15 +97,16 @@ namespace ntt { } // count the offsets on the host and copy to device - array_t tag_offset("tag_offset", num_tags - 3); - auto tag_offset_h = Kokkos::create_mirror_view(tag_offset); + array_t tag_offsets("tag_offsets", num_tags - 3); + auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); - for (auto t { 0u }; t < num_tags - 3; ++t) { - tag_offset_h(t) = npptag_vec[t + 2] + (t > 0u ? tag_offset_h(t - 1) : 0); + tag_offsets_h(0) = npptag_vec[2]; + for (auto t { 1u }; t < num_tags - 3; ++t) { + tag_offsets_h(t) = npptag_vec[t + 2] + tag_offsets_h(t - 1); } - Kokkos::deep_copy(tag_offset, tag_offset_h); + Kokkos::deep_copy(tag_offsets, tag_offsets_h); - return { npptag_vec, tag_offset }; + return { npptag_vec, tag_offsets }; } template diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 70c175972..68248db13 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -368,14 +368,18 @@ namespace comm { array_t send_buff_real { "send_buff_real", npart_send_in * NREALS }; array_t send_buff_prtldx { "send_buff_prtldx", npart_send_in * NPRTLDX }; + + std::size_t idx_offset = npart_dead; + if (tag_send > 2) { + idx_offset += tag_offsets(tag_send - 3); + } // clang-format off Kokkos::parallel_for( "PopulateSendBuffer", npart_send_in, kernel::comm::PopulatePrtlSendBuffer_kernel( send_buff_int, send_buff_real, send_buff_prtldx, - NINTS, NREALS, NPRTLDX, - (tag_send > 2 ? tag_offsets(tag_send - 3) : 0) + npart_dead, + NINTS, NREALS, NPRTLDX, idx_offset, species.i1, species.i1_prev, species.dx1, species.dx1_prev, species.i2, species.i2_prev, species.dx2, species.dx2_prev, species.i3, species.i3_prev, species.dx3, species.dx3_prev, From 3c8536392f38325df4338509b5ebb7838a884cc2 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 23 Jan 2025 17:39:41 -0500 Subject: [PATCH 089/124] minor --- src/framework/domain/comm_mpi.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 68248db13..24f17015f 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -320,14 +320,14 @@ namespace comm { } template - void CommunicateParticles(Particles& species, - array_t outgoing_indices, - array_t tag_offsets, - std::vector npptag_vec, - std::vector npptag_recv_vec, - std::vector send_ranks, - std::vector recv_ranks, - const dir::dirs_t& dirs_to_comm) { + void CommunicateParticles(Particles& species, + const array_t& outgoing_indices, + const array_t& tag_offsets, + const std::vector& npptag_vec, + const std::vector& npptag_recv_vec, + const std::vector& send_ranks, + const std::vector& recv_ranks, + const dir::dirs_t& dirs_to_comm) { // @TODO_1.2.0: communicate payloads // number of arrays of each type to send/recv From d376b6e6fb84b795dd1b94a58f0da41e715d4be7 Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 23 Jan 2025 17:54:38 -0500 Subject: [PATCH 090/124] minor bug: tag access --- src/framework/domain/comm_mpi.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 24f17015f..f001738cf 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -369,9 +369,12 @@ namespace comm { array_t send_buff_prtldx { "send_buff_prtldx", npart_send_in * NPRTLDX }; + auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); + Kokkos::deep_copy(tag_offsets_h, tag_offsets); + std::size_t idx_offset = npart_dead; if (tag_send > 2) { - idx_offset += tag_offsets(tag_send - 3); + idx_offset += tag_offsets_h(tag_send - 3); } // clang-format off Kokkos::parallel_for( From f7ec06e55f202efb0c11c21eb3c3bd809e9e2329 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 23 Jan 2025 19:38:03 -0500 Subject: [PATCH 091/124] inline if patched --- extern/Kokkos | 2 +- extern/adios2 | 2 +- extern/plog | 2 +- src/kernels/comm.hpp | 8 ++++++-- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/extern/Kokkos b/extern/Kokkos index eb11070f6..abaec2e5d 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit eb11070f67565b2e660659f5207f0363bdf3b882 +Subproject commit abaec2e5da1e15e367e48d2a3aa649770e8bcc72 diff --git a/extern/adios2 b/extern/adios2 index f80ad829d..b574cc9c2 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit f80ad829d751241140c40923503e1888e27e22e1 +Subproject commit b574cc9c29b19448ed9f279c4966c97740328441 diff --git a/extern/plog b/extern/plog index 85a871b13..94899e0b9 160000 --- a/extern/plog +++ b/extern/plog @@ -1 +1 @@ -Subproject commit 85a871b13be0bd1a9e0110744fa60cc9bd1e8380 +Subproject commit 94899e0b926ac1b0f4750bfbd495167b4a6ae9ef diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp index 7446d285a..a24ee897e 100644 --- a/src/kernels/comm.hpp +++ b/src/kernels/comm.hpp @@ -276,8 +276,12 @@ namespace kernel::comm { , tag { tag } {} Inline void operator()(index_t p) const { - const auto idx = (p >= npart_holes ? npart + p - npart_holes - : outgoing_indices(p)); + std::size_t idx; + if (p >= npart_holes) { + idx = npart + p - npart_holes; + } else { + idx = outgoing_indices(p); + } if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { i1(idx) = recv_buff_int(NINTS * p + 0); i1_prev(idx) = recv_buff_int(NINTS * p + 1); From fed4e62803ada0d808ba012d1772b37425a3c546 Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 24 Jan 2025 13:28:12 -0500 Subject: [PATCH 092/124] bug on gpus fixed --- setups/srpic/blob/pgen.hpp | 49 ++++++++-------- src/framework/containers/particles.cpp | 63 ++++++++++++++++++++- src/framework/containers/particles.h | 2 + src/framework/domain/comm_mpi.hpp | 75 +++++++++++++------------ src/framework/domain/communications.cpp | 43 +++++++------- src/kernels/comm.hpp | 8 ++- 6 files changed, 153 insertions(+), 87 deletions(-) diff --git a/setups/srpic/blob/pgen.hpp b/setups/srpic/blob/pgen.hpp index 38b3db1c5..f7b7d71b5 100644 --- a/setups/srpic/blob/pgen.hpp +++ b/setups/srpic/blob/pgen.hpp @@ -21,17 +21,17 @@ namespace user { CounterstreamEnergyDist(const M& metric, real_t v_max) : arch::EnergyDistribution { metric } , v_max { v_max } {} - + Inline void operator()(const coord_t& x_Ph, vec_t& v, unsigned short sp) const override { v[0] = v_max; } - + private: const real_t v_max; }; - + template struct GaussianDist : public arch::SpatialDistribution { GaussianDist(const M& metric, real_t x1c, real_t x2c, real_t dr) @@ -39,20 +39,20 @@ namespace user { , x1c { x1c } , x2c { x2c } , dr { dr } {} - + // to properly scale the number density, the probability should be normalized to 1 Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - if (math::abs(x_Ph[0] - x1c) < dr && math::abs(x_Ph[1] - x2c) < dr){ - return 1.0; - }else{ - return 0.0; - } + if (math::abs(x_Ph[0] - x1c) < dr && math::abs(x_Ph[1] - x2c) < dr) { + return 1.0; + } else { + return 0.0; + } } private: const real_t x1c, x2c, dr; }; - + template struct PGen : public arch::ProblemGenerator { @@ -78,24 +78,23 @@ namespace user { , dr { p.template get("setup.dr") } {} inline void InitPrtls(Domain& local_domain) { - const auto energy_dist = CounterstreamEnergyDist( - local_domain.mesh.metric, - v_max); + const auto energy_dist = CounterstreamEnergyDist(local_domain.mesh.metric, + v_max); const auto spatial_dist = GaussianDist(local_domain.mesh.metric, - x1c, - x2c, - dr); - const auto injector = - arch::NonUniformInjector( - energy_dist, - spatial_dist, - { 1, 2 }); + x1c, + x2c, + dr); + const auto injector = + arch::NonUniformInjector( + energy_dist, + spatial_dist, + { 1, 2 }); arch::InjectNonUniform>( - params, - local_domain, - injector, - 1.0); + params, + local_domain, + injector, + 1.0); } }; diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index 235358760..d2eed1b81 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -4,6 +4,7 @@ #include "global.h" #include "arch/kokkos_aliases.h" +#include "utils/numeric.h" #include "utils/sorting.h" #include "framework/containers/species.h" @@ -12,6 +13,8 @@ #include #include +#include +#include #include #include #include @@ -72,7 +75,7 @@ namespace ntt { -> std::pair, array_t> { auto this_tag = tag; const auto num_tags = ntags(); - array_t npptag("nparts_per_tag", ntags()); + array_t npptag { "nparts_per_tag", ntags() }; // count # of particles per each tag auto npptag_scat = Kokkos::Experimental::create_scatter_view(npptag); @@ -100,7 +103,7 @@ namespace ntt { array_t tag_offsets("tag_offsets", num_tags - 3); auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); - tag_offsets_h(0) = npptag_vec[2]; + tag_offsets_h(0) = npptag_vec[2]; // offset for tag = 3 for (auto t { 1u }; t < num_tags - 3; ++t) { tag_offsets_h(t) = npptag_vec[t + 2] + tag_offsets_h(t - 1); } @@ -133,7 +136,7 @@ namespace ntt { Kokkos::parallel_reduce( "CountDeadAlive", rangeActiveParticles(), - Lambda(index_t p, std::size_t & nalive, std::size_t & ndead) { + Lambda(index_t p, std::size_t& nalive, std::size_t& ndead) { nalive += (this_tag(p) == ParticleTag::alive); ndead += (this_tag(p) == ParticleTag::dead); if (this_tag(p) != ParticleTag::alive and this_tag(p) != ParticleTag::dead) { @@ -216,6 +219,60 @@ namespace ntt { m_is_sorted = true; } + // template + // void Particles::PrintTags() { + // auto tag_h = Kokkos::create_mirror_view(tag); + // Kokkos::deep_copy(tag_h, tag); + // auto i1_h = Kokkos::create_mirror_view(i1); + // Kokkos::deep_copy(i1_h, i1); + // auto dx1_h = Kokkos::create_mirror_view(dx1); + // Kokkos::deep_copy(dx1_h, dx1); + // std::cout << "species " << label() << " [npart = " << npart() << "]" + // << std::endl; + // std::cout << "idxs: "; + // for (auto i = 0; i < IMIN(tag_h.extent(0), 30); ++i) { + // std::cout << std::setw(3) << i << " "; + // if (i == npart() - 1) { + // std::cout << "| "; + // } + // } + // if (tag_h.extent(0) > 30) { + // std::cout << "... " << std::setw(3) << tag_h.extent(0) - 1; + // } + // std::cout << std::endl << "tags: "; + // for (auto i = 0; i < IMIN(tag_h.extent(0), 30); ++i) { + // std::cout << std::setw(3) << (short)tag_h(i) << " "; + // if (i == npart() - 1) { + // std::cout << "| "; + // } + // } + // if (tag_h.extent(0) > 30) { + // std::cout << "..." << std::setw(3) << (short)tag_h(tag_h.extent(0) - 1); + // } + // std::cout << std::endl << "i1s : "; + // for (auto i = 0; i < IMIN(i1_h.extent(0), 30); ++i) { + // std::cout << std::setw(3) << i1_h(i) << " "; + // if (i == npart() - 1) { + // std::cout << "| "; + // } + // } + // if (i1_h.extent(0) > 30) { + // std::cout << "..." << std::setw(3) << i1_h(i1_h.extent(0) - 1); + // } + // std::cout << std::endl << "dx1s : "; + // for (auto i = 0; i < IMIN(dx1_h.extent(0), 30); ++i) { + // std::cout << std::setprecision(2) << std::setw(3) << dx1_h(i) << " "; + // if (i == npart() - 1) { + // std::cout << "| "; + // } + // } + // if (dx1_h.extent(0) > 30) { + // std::cout << "..." << std::setprecision(2) << std::setw(3) + // << dx1_h(dx1_h.extent(0) - 1); + // } + // std::cout << std::endl; + // } + template struct Particles; template struct Particles; template struct Particles; diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 9024fef1e..d84bd0cc9 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -219,6 +219,8 @@ namespace ntt { * @brief Copy particle data from device to host. */ void SyncHostDevice(); + + // void PrintTags(); }; } // namespace ntt diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index f001738cf..b477e47f7 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -340,13 +340,12 @@ namespace comm { MPI_Comm_rank(MPI_COMM_WORLD, &rank); // buffers to store recv data - const auto npart_alive = npptag_vec[ParticleTag::alive]; - const auto npart_dead = npptag_vec[ParticleTag::dead]; - const auto npart_send = outgoing_indices.extent(0) - npart_dead; - const auto npart_recv = std::accumulate(npptag_recv_vec.begin(), + const auto npart_alive = npptag_vec[ParticleTag::alive]; + const auto npart_dead = npptag_vec[ParticleTag::dead]; + const auto npart_send = outgoing_indices.extent(0) - npart_dead; + const auto npart_recv = std::accumulate(npptag_recv_vec.begin(), npptag_recv_vec.end(), static_cast(0)); - array_t recv_buff_int { "recv_buff_int", npart_recv * NINTS }; array_t recv_buff_real { "recv_buff_real", npart_recv * NREALS }; array_t recv_buff_prtldx { "recv_buff_prtldx", npart_recv * NPRTLDX }; @@ -376,21 +375,23 @@ namespace comm { if (tag_send > 2) { idx_offset += tag_offsets_h(tag_send - 3); } - // clang-format off - Kokkos::parallel_for( - "PopulateSendBuffer", - npart_send_in, - kernel::comm::PopulatePrtlSendBuffer_kernel( - send_buff_int, send_buff_real, send_buff_prtldx, - NINTS, NREALS, NPRTLDX, idx_offset, - species.i1, species.i1_prev, species.dx1, species.dx1_prev, - species.i2, species.i2_prev, species.dx2, species.dx2_prev, - species.i3, species.i3_prev, species.dx3, species.dx3_prev, - species.ux1, species.ux2, species.ux3, - species.weight, species.phi, species.tag, - outgoing_indices) - ); - // clang-format on + if (npart_send_in > 0) { + // clang-format off + Kokkos::parallel_for( + "PopulatePrtlSendBuffer", + npart_send_in, + kernel::comm::PopulatePrtlSendBuffer_kernel( + send_buff_int, send_buff_real, send_buff_prtldx, + NINTS, NREALS, NPRTLDX, idx_offset, + species.i1, species.i1_prev, species.dx1, species.dx1_prev, + species.i2, species.i2_prev, species.dx2, species.dx2_prev, + species.i3, species.i3_prev, species.dx3, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.weight, species.phi, species.tag, + outgoing_indices) + ); + // clang-format on + } const auto recv_offset_int = current_received * NINTS; const auto recv_offset_real = current_received * NREALS; @@ -489,22 +490,24 @@ namespace comm { } // end direction loop - // clang-format off - Kokkos::parallel_for( - "PopulateFromRecvBuffer", - npart_recv, - kernel::comm::ExtractReceivedPrtls_kernel( - recv_buff_int, recv_buff_real, recv_buff_prtldx, - NINTS, NREALS, NPRTLDX, - species.npart(), - species.i1, species.i1_prev, species.dx1, species.dx1_prev, - species.i2, species.i2_prev, species.dx2, species.dx2_prev, - species.i3, species.i3_prev, species.dx3, species.dx3_prev, - species.ux1, species.ux2, species.ux3, - species.weight, species.phi, species.tag, - outgoing_indices) - ); - // clang-format on + if (npart_recv > 0) { + // clang-format off + Kokkos::parallel_for( + "ExtractReceivedPrtls", + npart_recv, + kernel::comm::ExtractReceivedPrtls_kernel( + recv_buff_int, recv_buff_real, recv_buff_prtldx, + NINTS, NREALS, NPRTLDX, + species.npart(), species.maxnpart(), + species.i1, species.i1_prev, species.dx1, species.dx1_prev, + species.i2, species.i2_prev, species.dx2, species.dx2_prev, + species.i3, species.i3_prev, species.dx3, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.weight, species.phi, species.tag, + outgoing_indices) + ); + // clang-format on + } const auto npart = species.npart(); const auto npart_holes = outgoing_indices.extent(0); diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index fc065ab9d..7dc5d285a 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -36,10 +36,10 @@ namespace ntt { using comm_params_t = std::pair>; template - auto GetSendRecvRanks( - Metadomain* metadomain, - Domain& domain, - dir::direction_t direction) -> std::pair { + auto GetSendRecvRanks(Metadomain* metadomain, + Domain& domain, + dir::direction_t direction) + -> std::pair { Domain* send_to_nghbr_ptr = nullptr; Domain* recv_from_nghbr_ptr = nullptr; // set pointers to the correct send/recv domains @@ -119,11 +119,11 @@ namespace ntt { } template - auto GetSendRecvParams( - Metadomain* metadomain, - Domain& domain, - dir::direction_t direction, - bool synchronize) -> std::pair { + auto GetSendRecvParams(Metadomain* metadomain, + Domain& domain, + dir::direction_t direction, + bool synchronize) + -> std::pair { const auto [send_indrank, recv_indrank] = GetSendRecvRanks(metadomain, domain, direction); const auto [send_ind, send_rank] = send_indrank; @@ -512,11 +512,15 @@ namespace ntt { // # of particles to receive per each tag (direction) std::vector npptag_recv_vec(ntags - 2, 0); // coordinate shifts per each direction - array_t shifts_in_x1("shifts_in_x1", ntags - 2); - array_t shifts_in_x2("shifts_in_x2", ntags - 2); - array_t shifts_in_x3("shifts_in_x3", ntags - 2); + array_t shifts_in_x1 { "shifts_in_x1", ntags - 2 }; + array_t shifts_in_x2 { "shifts_in_x2", ntags - 2 }; + array_t shifts_in_x3 { "shifts_in_x3", ntags - 2 }; + auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); + auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); + auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); + // all directions requiring communication - dir::dirs_t dirs_to_comm; + dir::dirs_t dirs_to_comm; // ranks & indices of meshblock to send/recv from std::vector send_ranks, send_inds; @@ -568,7 +572,6 @@ namespace ntt { // ... tag_send - 2: because we only shift tags > 2 (i.e. no dead/alive) if (is_sending) { if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { - auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); if (direction[0] == -1) { // sending backwards in x1 (add sx1 of target meshblock) shifts_in_x1_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( @@ -577,37 +580,35 @@ namespace ntt { // sending forward in x1 (subtract sx1 of source meshblock) shifts_in_x1_h(tag_send - 2) = -domain.mesh.n_active(in::x1); } - Kokkos::deep_copy(shifts_in_x1, shifts_in_x1_h); } if constexpr (D == Dim::_2D || D == Dim::_3D) { - auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); if (direction[1] == -1) { shifts_in_x2_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( in::x2); } else if (direction[1] == 1) { shifts_in_x2_h(tag_send - 2) = -domain.mesh.n_active(in::x2); } - Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); } if constexpr (D == Dim::_3D) { - auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); if (direction[2] == -1) { shifts_in_x3_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( in::x3); } else if (direction[2] == 1) { shifts_in_x3_h(tag_send - 2) = -domain.mesh.n_active(in::x3); } - Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); } } } // end directions loop + Kokkos::deep_copy(shifts_in_x1, shifts_in_x1_h); + Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); + Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); + array_t outgoing_indices { "outgoing_indices", npart - npart_alive }; - // clang-format off Kokkos::parallel_for( - "OutgoingIndicesAndDisplace", + "PrepareOutgoingPrtls", species.rangeActiveParticles(), kernel::comm::PrepareOutgoingPrtls_kernel( shifts_in_x1, shifts_in_x2, shifts_in_x3, diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp index a24ee897e..eea79b08b 100644 --- a/src/kernels/comm.hpp +++ b/src/kernels/comm.hpp @@ -81,8 +81,6 @@ namespace kernel::comm { if (tag(p) > 2) { idx_for_tag += tag_offsets(tag(p) - 3); } - // (tag(p) != ParticleTag::dead ? npart_dead : 0) + - // (tag(p) > 2 ? tag_offsets(tag(p) - 3) : 0); if (idx_for_tag >= npart - npart_alive) { raise::KernelError(HERE, "Outgoing indices idx exceeds the array size"); } @@ -214,6 +212,7 @@ namespace kernel::comm { const unsigned short NINTS, NREALS, NPRTLDX; const std::size_t npart, npart_holes; + const std::size_t maxnpart; array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; array_t ux1, ux2, ux3, weight, phi; @@ -229,6 +228,7 @@ namespace kernel::comm { unsigned short NREALS, unsigned short NPRTLDX, std::size_t npart, + std::size_t maxnpart, array_t& i1, array_t& i1_prev, array_t& dx1, @@ -255,6 +255,7 @@ namespace kernel::comm { , NREALS { NREALS } , NPRTLDX { NPRTLDX } , npart { npart } + , maxnpart { maxnpart } , npart_holes { outgoing_indices.extent(0) } , i1 { i1 } , i1_prev { i1_prev } @@ -282,6 +283,9 @@ namespace kernel::comm { } else { idx = outgoing_indices(p); } + if (idx >= maxnpart) { + raise::KernelError(HERE, "Received particle index exceeds the array size"); + } if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { i1(idx) = recv_buff_int(NINTS * p + 0); i1_prev(idx) = recv_buff_int(NINTS * p + 1); From 7388d8e7f805910b1ec53c2f829ecd7aeb9a4e9d Mon Sep 17 00:00:00 2001 From: haykh Date: Fri, 24 Jan 2025 16:28:48 -0500 Subject: [PATCH 093/124] pld comm --- src/framework/domain/comm_mpi.hpp | 57 ++++++++++++++++++++++++++----- src/kernels/comm.hpp | 45 +++++++++++++++++------- 2 files changed, 81 insertions(+), 21 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index f001738cf..dec321883 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -335,9 +335,7 @@ namespace comm { D == Dim::_2D and C != Coord::Cart); const unsigned short NINTS = 2 * static_cast(D); const unsigned short NPRTLDX = 2 * static_cast(D); - const unsigned short NPLD = species.npld(); - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const unsigned short NPLDS = species.npld(); // buffers to store recv data const auto npart_alive = npptag_vec[ParticleTag::alive]; @@ -350,6 +348,11 @@ namespace comm { array_t recv_buff_int { "recv_buff_int", npart_recv * NINTS }; array_t recv_buff_real { "recv_buff_real", npart_recv * NREALS }; array_t recv_buff_prtldx { "recv_buff_prtldx", npart_recv * NPRTLDX }; + array_t recv_buff_pld; + + if (NPLDS > 0) { + recv_buff_pld = array_t { "recv_buff_pld", npart_recv * NPLDS }; + } auto iteration = 0; auto current_received = 0; @@ -368,6 +371,10 @@ namespace comm { array_t send_buff_real { "send_buff_real", npart_send_in * NREALS }; array_t send_buff_prtldx { "send_buff_prtldx", npart_send_in * NPRTLDX }; + array_t send_buff_pld; + if (NPLDS > 0) { + send_buff_pld = array_t { "send_buff_pld", npart_send_in * NPLDS }; + } auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); Kokkos::deep_copy(tag_offsets_h, tag_offsets); @@ -381,13 +388,13 @@ namespace comm { "PopulateSendBuffer", npart_send_in, kernel::comm::PopulatePrtlSendBuffer_kernel( - send_buff_int, send_buff_real, send_buff_prtldx, - NINTS, NREALS, NPRTLDX, idx_offset, + send_buff_int, send_buff_real, send_buff_prtldx, send_buff_pld, + NINTS, NREALS, NPRTLDX, NPLDS, idx_offset, species.i1, species.i1_prev, species.dx1, species.dx1_prev, species.i2, species.i2_prev, species.dx2, species.dx2_prev, species.i3, species.i3_prev, species.dx3, species.dx3_prev, species.ux1, species.ux2, species.ux3, - species.weight, species.phi, species.tag, + species.weight, species.phi, species.pld, species.tag, outgoing_indices) ); // clang-format on @@ -395,6 +402,7 @@ namespace comm { const auto recv_offset_int = current_received * NINTS; const auto recv_offset_real = current_received * NREALS; const auto recv_offset_prtldx = current_received * NPRTLDX; + const auto recv_offset_pld = current_received * NPLDS; if ((send_rank >= 0) and (recv_rank >= 0) and (npart_send_in > 0) and (npart_recv_in > 0)) { @@ -438,6 +446,20 @@ namespace comm { 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if (NPLDS > 0) { + MPI_Sendrecv(send_buff_pld.data(), + npart_send_in * NPLDS, + mpi::get_type(), + send_rank, + 0, + recv_buff_pld.data() + recv_offset_pld, + npart_recv_in * NPLDS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } } else if ((send_rank >= 0) and (npart_send_in > 0)) { MPI_Send(send_buff_int.data(), npart_send_in * NINTS, @@ -457,6 +479,14 @@ namespace comm { send_rank, 0, MPI_COMM_WORLD); + if (NPLDS > 0) { + MPI_Send(send_buff_pld.data(), + npart_send_in * NPLDS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + } } else if ((recv_rank >= 0) and (npart_recv_in > 0)) { raise::ErrorIf(recv_offset_int + npart_recv_in * NINTS > recv_buff_int.extent(0), @@ -483,6 +513,15 @@ namespace comm { 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if (NPLDS > 0) { + MPI_Recv(recv_buff_pld.data() + recv_offset_pld, + npart_recv_in * NPLDS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } } current_received += npart_recv_in; iteration++; @@ -494,14 +533,14 @@ namespace comm { "PopulateFromRecvBuffer", npart_recv, kernel::comm::ExtractReceivedPrtls_kernel( - recv_buff_int, recv_buff_real, recv_buff_prtldx, - NINTS, NREALS, NPRTLDX, + recv_buff_int, recv_buff_real, recv_buff_prtldx, recv_buff_pld, + NINTS, NREALS, NPRTLDX, NPLDS, species.npart(), species.i1, species.i1_prev, species.dx1, species.dx1_prev, species.i2, species.i2_prev, species.dx2, species.dx2_prev, species.i3, species.i3_prev, species.dx3, species.dx3_prev, species.ux1, species.ux2, species.ux3, - species.weight, species.phi, species.tag, + species.weight, species.phi, species.pld, species.tag, outgoing_indices) ); // clang-format on diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp index a24ee897e..e8fe96fb0 100644 --- a/src/kernels/comm.hpp +++ b/src/kernels/comm.hpp @@ -111,13 +111,15 @@ namespace kernel::comm { array_t send_buff_int; array_t send_buff_real; array_t send_buff_prtldx; + array_t send_buff_pld; - const unsigned short NINTS, NREALS, NPRTLDX; + const unsigned short NINTS, NREALS, NPRTLDX, NPLDS; const std::size_t idx_offset; const array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; - const array_t ux1, ux2, ux3, weight, phi; const array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; + const array_t ux1, ux2, ux3, weight, phi; + const array_t pld; array_t tag; const array_t outgoing_indices; @@ -125,9 +127,11 @@ namespace kernel::comm { PopulatePrtlSendBuffer_kernel(array_t& send_buff_int, array_t& send_buff_real, array_t& send_buff_prtldx, + array_t& send_buff_pld, unsigned short NINTS, unsigned short NREALS, unsigned short NPRTLDX, + unsigned short NPLDS, std::size_t idx_offset, const array_t& i1, const array_t& i1_prev, @@ -146,25 +150,28 @@ namespace kernel::comm { const array_t& ux3, const array_t& weight, const array_t& phi, + const array_t& pld, array_t& tag, const array_t& outgoing_indices) : send_buff_int { send_buff_int } , send_buff_real { send_buff_real } , send_buff_prtldx { send_buff_prtldx } + , send_buff_pld { send_buff_pld } , NINTS { NINTS } , NREALS { NREALS } , NPRTLDX { NPRTLDX } + , NPLDS { NPLDS } , idx_offset { idx_offset } , i1 { i1 } , i1_prev { i1_prev } - , dx1 { dx1 } - , dx1_prev { dx1_prev } , i2 { i2 } , i2_prev { i2_prev } - , dx2 { dx2 } - , dx2_prev { dx2_prev } , i3 { i3 } , i3_prev { i3_prev } + , dx1 { dx1 } + , dx1_prev { dx1_prev } + , dx2 { dx2 } + , dx2_prev { dx2_prev } , dx3 { dx3 } , dx3_prev { dx3_prev } , ux1 { ux1 } @@ -172,6 +179,7 @@ namespace kernel::comm { , ux3 { ux3 } , weight { weight } , phi { phi } + , pld { pld } , tag { tag } , outgoing_indices { outgoing_indices } {} @@ -202,6 +210,11 @@ namespace kernel::comm { if constexpr (D == Dim::_2D and C != Coord::Cart) { send_buff_real(NREALS * p + 4) = phi(idx); } + if (NPLD > 0) { + for (auto l { 0u }; l < NPLD; ++l) { + send_buff_pld(NPLDS * p + l) = pld(idx, l); + } + } tag(idx) = ParticleTag::dead; } }; @@ -211,13 +224,15 @@ namespace kernel::comm { const array_t recv_buff_int; const array_t recv_buff_real; const array_t recv_buff_prtldx; + const array_t recv_buff_pld; - const unsigned short NINTS, NREALS, NPRTLDX; + const unsigned short NINTS, NREALS, NPRTLDX, NPLDS; const std::size_t npart, npart_holes; array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; - array_t ux1, ux2, ux3, weight, phi; array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; + array_t ux1, ux2, ux3, weight, phi; + array_t pld; array_t tag; const array_t outgoing_indices; @@ -225,9 +240,11 @@ namespace kernel::comm { ExtractReceivedPrtls_kernel(const array_t& recv_buff_int, const array_t& recv_buff_real, const array_t& recv_buff_prtldx, + const array_t& recv_buff_pld, unsigned short NINTS, unsigned short NREALS, unsigned short NPRTLDX, + unsigned short NPLDS, std::size_t npart, array_t& i1, array_t& i1_prev, @@ -246,26 +263,29 @@ namespace kernel::comm { array_t& ux3, array_t& weight, array_t& phi, + array_t& pld, array_t& tag, const array_t& outgoing_indices) : recv_buff_int { recv_buff_int } , recv_buff_real { recv_buff_real } , recv_buff_prtldx { recv_buff_prtldx } + , recv_buff_pld { recv_buff_pld } , NINTS { NINTS } , NREALS { NREALS } , NPRTLDX { NPRTLDX } + , NPLDS { NPLDS } , npart { npart } , npart_holes { outgoing_indices.extent(0) } , i1 { i1 } , i1_prev { i1_prev } - , dx1 { dx1 } - , dx1_prev { dx1_prev } , i2 { i2 } , i2_prev { i2_prev } - , dx2 { dx2 } - , dx2_prev { dx2_prev } , i3 { i3 } , i3_prev { i3_prev } + , dx1 { dx1 } + , dx1_prev { dx1_prev } + , dx2 { dx2 } + , dx2_prev { dx2_prev } , dx3 { dx3 } , dx3_prev { dx3_prev } , ux1 { ux1 } @@ -273,6 +293,7 @@ namespace kernel::comm { , ux3 { ux3 } , weight { weight } , phi { phi } + , pld { pld } , tag { tag } {} Inline void operator()(index_t p) const { From 6aa7a8099d30e0e950d3f026d18578d4da71c728 Mon Sep 17 00:00:00 2001 From: haykh Date: Tue, 28 Jan 2025 15:15:17 -0500 Subject: [PATCH 094/124] minor bug fixed in kernel --- src/kernels/comm.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp index d33e2e006..b280ce38b 100644 --- a/src/kernels/comm.hpp +++ b/src/kernels/comm.hpp @@ -292,7 +292,8 @@ namespace kernel::comm { , weight { weight } , phi { phi } , pld { pld } - , tag { tag } {} + , tag { tag } + , outgoing_indices { outgoing_indices } {} Inline void operator()(index_t p) const { std::size_t idx; From 3071251053b08a42ee852f1e98a6553ad5030dc5 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 10:20:47 -0500 Subject: [PATCH 095/124] nix devshell --- dev/nix/adios2.nix | 69 ++++++++++++++++++++++++++++++---------------- dev/nix/kokkos.nix | 64 ++++++++++++++++++++++++++++++++++++++++++ dev/nix/shell.nix | 54 ++++++++++++++++-------------------- 3 files changed, 133 insertions(+), 54 deletions(-) create mode 100644 dev/nix/kokkos.nix diff --git a/dev/nix/adios2.nix b/dev/nix/adios2.nix index 19c706aa4..f2cd4ca43 100644 --- a/dev/nix/adios2.nix +++ b/dev/nix/adios2.nix @@ -7,6 +7,20 @@ let name = "adios2"; version = "2.10.2"; + cmakeFlags = { + CMAKE_CXX_STANDARD = "17"; + CMAKE_CXX_EXTENSIONS = "OFF"; + CMAKE_POSITION_INDEPENDENT_CODE = "TRUE"; + BUILD_SHARED_LIBS = "ON"; + ADIOS2_USE_HDF5 = if hdf5 then "ON" else "OFF"; + ADIOS2_USE_Python = "OFF"; + ADIOS2_USE_Fortran = "OFF"; + ADIOS2_USE_ZeroMQ = "OFF"; + BUILD_TESTING = "OFF"; + ADIOS2_BUILD_EXAMPLES = "OFF"; + ADIOS2_USE_MPI = if mpi then "ON" else "OFF"; + CMAKE_BUILD_TYPE = "Release"; + } // (if !mpi then { ADIOS2_HAVE_HDF5_VOL = "OFF"; } else { }); in pkgs.stdenv.mkDerivation { pname = "${name}${if hdf5 then "-hdf5" else ""}${if mpi then "-mpi" else ""}"; @@ -17,35 +31,44 @@ pkgs.stdenv.mkDerivation { sha256 = "sha256-NVyw7xoPutXeUS87jjVv1YxJnwNGZAT4QfkBLzvQbwg="; }; - nativeBuildInputs = - with pkgs; + nativeBuildInputs = with pkgs; [ + cmake + perl + ]; + + propagatedBuildInputs = [ - cmake - libgcc - perl - breakpointHook + pkgs.gcc13 ] - ++ (if mpi then [ openmpi ] else [ ]); - - buildInputs = if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]; + ++ (if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]) + ++ (if mpi then [ pkgs.openmpi ] else [ ]); configurePhase = '' - cmake -B build $src \ - -D CMAKE_CXX_STANDARD=17 \ - -D CMAKE_CXX_EXTENSIONS=OFF \ - -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ - -D BUILD_SHARED_LIBS=ON \ - -D ADIOS2_USE_HDF5=${if hdf5 then "ON" else "OFF"} \ - -D ADIOS2_USE_Python=OFF \ - -D ADIOS2_USE_Fortran=OFF \ - -D ADIOS2_USE_ZeroMQ=OFF \ - -D BUILD_TESTING=OFF \ - -D ADIOS2_BUILD_EXAMPLES=OFF \ - -D ADIOS2_USE_MPI=${if mpi then "ON" else "OFF"} \ - -D ADIOS2_HAVE_HDF5_VOL=OFF \ - -D CMAKE_BUILD_TYPE=Release + cmake -B build $src ${ + pkgs.lib.attrsets.foldlAttrs ( + acc: key: value: + acc + " -D ${key}=${value}" + ) "" cmakeFlags + } ''; + # configurePhase = + # '' + # cmake -B build $src \ + # -D CMAKE_CXX_STANDARD=17 \ + # -D CMAKE_CXX_EXTENSIONS=OFF \ + # -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ + # -D BUILD_SHARED_LIBS=ON \ + # -D ADIOS2_USE_HDF5=${if hdf5 then "ON" else "OFF"} \ + # -D ADIOS2_USE_Python=OFF \ + # -D ADIOS2_USE_Fortran=OFF \ + # -D ADIOS2_USE_ZeroMQ=OFF \ + # -D BUILD_TESTING=OFF \ + # -D ADIOS2_BUILD_EXAMPLES=OFF \ + # -D ADIOS2_USE_MPI=${if mpi then "ON" else "OFF"} \ + # -D CMAKE_BUILD_TYPE=Release + # '' + buildPhase = '' cmake --build build -j ''; diff --git a/dev/nix/kokkos.nix b/dev/nix/kokkos.nix new file mode 100644 index 000000000..cfe583c7a --- /dev/null +++ b/dev/nix/kokkos.nix @@ -0,0 +1,64 @@ +{ + pkgs ? import { }, + arch ? "native", + gpu ? "none", +}: + +let + gpuUpper = pkgs.lib.toUpper gpu; + name = "kokkos"; + version = "4.5.01"; + compilerPkgs = { + "HIP" = with pkgs.rocmPackages; [ + rocm-core + clr + rocthrust + rocprim + rocminfo + rocm-smi + ]; + "NONE" = [ + pkgs.gcc13 + ]; + }; + cmakeFlags = { + "HIP" = [ + "-D CMAKE_C_COMPILER=hipcc" + "-D CMAKE_CXX_COMPILER=hipcc" + ]; + "NONE" = [ ]; + }; + getArch = + _: + if gpu != "none" && arch == "native" then + throw "Please specify an architecture when the GPU support is enabled. Available architectures: https://kokkos.org/kokkos-core-wiki/keywords.html#architectures" + else + pkgs.lib.toUpper arch; + +in +pkgs.stdenv.mkDerivation { + pname = "${name}"; + version = "${version}"; + src = pkgs.fetchgit { + url = "https://github.com/kokkos/kokkos/"; + rev = "v${version}"; + sha256 = "sha256-cI2p+6J+8BRV5fXTDxxHTfh6P5PeeLUiF73o5zVysHQ="; + }; + + nativeBuildInputs = with pkgs; [ + cmake + ]; + + propagatedBuildInputs = compilerPkgs.${gpuUpper}; + + cmakeFlags = [ + "-D CMAKE_CXX_STANDARD=17" + "-D CMAKE_CXX_EXTENSIONS=OFF" + "-D CMAKE_POSITION_INDEPENDENT_CODE=TRUE" + "-D Kokkos_ARCH_${getArch { }}=ON" + (if gpu != "none" then "-D Kokkos_ENABLE_${gpuUpper}=ON" else "") + "-D CMAKE_BUILD_TYPE=Release" + ] ++ cmakeFlags.${gpuUpper}; + + enableParallelBuilding = true; +} diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix index f9d48fbfd..219da0038 100644 --- a/dev/nix/shell.nix +++ b/dev/nix/shell.nix @@ -2,40 +2,36 @@ pkgs ? import { }, mpi ? false, hdf5 ? false, + gpu ? "none", + arch ? "native", }: let name = "entity-dev"; - compilerPkg = pkgs.gcc13; - compilerCXX = "g++"; - compilerCC = "gcc"; adios2Pkg = (pkgs.callPackage ./adios2.nix { inherit pkgs mpi hdf5; }); + kokkosPkg = (pkgs.callPackage ./kokkos.nix { inherit pkgs arch gpu; }); in pkgs.mkShell { name = "${name}-env"; - nativeBuildInputs = - with pkgs; - [ - zlib - cmake - - compilerPkg - - clang-tools - - adios2Pkg - python312 - python312Packages.jupyter - - cmake-format - neocmakelsp - black - pyright - taplo - vscode-langservers-extracted - ] - ++ (if mpi then [ pkgs.openmpi ] else [ ]) - ++ (if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]); + nativeBuildInputs = with pkgs; [ + zlib + cmake + + clang-tools + + adios2Pkg + kokkosPkg + + python312 + python312Packages.jupyter + + cmake-format + neocmakelsp + black + pyright + taplo + vscode-langservers-extracted + ]; LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath ([ pkgs.stdenv.cc.cc @@ -45,12 +41,8 @@ pkgs.mkShell { shellHook = '' BLUE='\033[0;34m' NC='\033[0m' - export CC=$(which ${compilerCC}) - export CXX=$(which ${compilerCXX}) - export CMAKE_CXX_COMPILER=$(which ${compilerCXX}) - export CMAKE_C_COMPILER=$(which ${compilerCC}) echo "" - echo -e "${name} nix-shell activated: ''\${BLUE}$(which ${compilerCXX})''\${NC}" + echo -e "${name} nix-shell activated" ''; } From 6aa045a4947854913112858fdfed36ca7100a144 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 10:51:45 -0500 Subject: [PATCH 096/124] pld reading in checkpoint fixed --- src/checkpoint/reader.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/checkpoint/reader.cpp b/src/checkpoint/reader.cpp index 208972561..9fc2d2640 100644 --- a/src/checkpoint/reader.cpp +++ b/src/checkpoint/reader.cpp @@ -98,7 +98,7 @@ namespace checkpoint { fmt::format("s%d_%s", s + 1, quantity.c_str())); if (var) { var.SetSelection(adios2::Box({ offset }, { count })); - const auto slice = std::pair { 0, count }; + const auto slice = range_tuple_t(0, count); auto array_h = Kokkos::create_mirror_view(array); reader.Get(var, Kokkos::subview(array_h, slice).data(), adios2::Mode::Sync); Kokkos::deep_copy(Kokkos::subview(array, slice), @@ -121,13 +121,12 @@ namespace checkpoint { auto var = io.InquireVariable(fmt::format("s%d_plds", s + 1)); if (var) { var.SetSelection(adios2::Box({ offset, 0 }, { count, nplds })); - const auto slice = std::pair { 0, count }; + const auto slice = range_tuple_t(0, count); auto array_h = Kokkos::create_mirror_view(array); reader.Get(var, Kokkos::subview(array_h, slice, range_tuple_t(0, nplds)).data(), adios2::Mode::Sync); - Kokkos::deep_copy(Kokkos::subview(array, slice, range_tuple_t(0, nplds)), - Kokkos::subview(array_h, slice, range_tuple_t(0, nplds))); + Kokkos::deep_copy(array, array_h); } else { raise::Error(fmt::format("Variable: s%d_plds not found", s + 1), HERE); } From 649e3285796f107a2699c38e3c740cf1c998d895 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 10:54:43 -0500 Subject: [PATCH 097/124] clean benchmark --- benchmark/benchmark.cpp | 274 ++-------------------------------------- legacy/benchmark.cpp | 273 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 282 insertions(+), 265 deletions(-) create mode 100644 legacy/benchmark.cpp diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 54fc17cf9..98306c92b 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -1,273 +1,17 @@ -#include "enums.h" #include "global.h" -#include "utils/error.h" - -#include "metrics/metric_base.h" -#include "metrics/minkowski.h" - -#include "framework/containers/species.h" -#include "framework/domain/domain.h" -#include "framework/domain/metadomain.h" - -#include - -#include "framework/domain/communications.cpp" -#include "mpi.h" -#include "mpi-ext.h" - -#define TIMER_START(label) \ - Kokkos::fence(); \ - auto start_##label = std::chrono::high_resolution_clock::now(); - -#define TIMER_STOP(label) \ - Kokkos::fence(); \ - auto stop_##label = std::chrono::high_resolution_clock::now(); \ - auto duration_##label = std::chrono::duration_cast( \ - stop_##label - start_##label) \ - .count(); \ - std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" \ - << std::endl; - -/* - Test to check the performance of the new particle allocation scheme - - Create a metadomain object main() - - Set npart + initialize tags InitializeParticleArrays() - - 'Push' the particles by randomly updating the tags PushParticles() - - Communicate particles to neighbors and time the communication - - Compute the time taken for best of N iterations for the communication - */ -using namespace ntt; - -// Set npart and set the particle tags to alive -template -void InitializeParticleArrays(Domain& domain, const int npart) { - raise::ErrorIf(npart > domain.species[0].maxnpart(), - "Npart cannot be greater than maxnpart", - HERE); - const auto nspecies = domain.species.size(); - for (int i_spec = 0; i_spec < nspecies; i_spec++) { - domain.species[i_spec].set_npart(npart); - domain.species[i_spec].SyncHostDevice(); - auto& this_tag = domain.species[i_spec].tag; - Kokkos::parallel_for( - "Initialize particles", - npart, - Lambda(const std::size_t i) { this_tag(i) = ParticleTag::alive; }); - } - return; -} - -// Randomly reassign tags to particles for a fraction of particles -template -void PushParticles(Domain& domain, - const double send_frac, - const int seed_ind, - const int seed_tag) { - raise::ErrorIf(send_frac > 1.0, "send_frac cannot be greater than 1.0", HERE); - const auto nspecies = domain.species.size(); - for (int i_spec = 0; i_spec < nspecies; i_spec++) { - domain.species[i_spec].set_unsorted(); - const auto nparticles = domain.species[i_spec].npart(); - const auto nparticles_to_send = static_cast(send_frac * nparticles); - // Generate random indices to send - // Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); - Kokkos::View indices_to_send("indices_to_send", nparticles_to_send); - Kokkos::fill_random(indices_to_send, domain.random_pool, 0, nparticles); - // Generate random tags to send - // Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); - Kokkos::View tags_to_send("tags_to_send", nparticles_to_send); - Kokkos::fill_random(tags_to_send, - domain.random_pool, - 0, - domain.species[i_spec].ntags()); - auto& this_tag = domain.species[i_spec].tag; - Kokkos::parallel_for( - "Push particles", - nparticles_to_send, - Lambda(const std::size_t i) { - auto prtl_to_send = indices_to_send(i); - auto tag_to_send = tags_to_send(i); - this_tag(prtl_to_send) = tag_to_send; - }); - domain.species[i_spec].npart_per_tag(); - domain.species[i_spec].SyncHostDevice(); - } - return; -} +#include +#include auto main(int argc, char* argv[]) -> int { - GlobalInitialize(argc, argv); - { - /* - MPI checks - */ - printf("Compile time check:\n"); -#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT - printf("This MPI library has CUDA-aware support.\n", MPIX_CUDA_AWARE_SUPPORT); -#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT - printf("This MPI library does not have CUDA-aware support.\n"); -#else - printf("This MPI library cannot determine if there is CUDA-aware support.\n"); -#endif /* MPIX_CUDA_AWARE_SUPPORT */ -printf("Run time check:\n"); -#if defined(MPIX_CUDA_AWARE_SUPPORT) - if (1 == MPIX_Query_cuda_support()) { - printf("This MPI library has CUDA-aware support.\n"); - } else { - printf("This MPI library does not have CUDA-aware support.\n"); - } -#else /* !defined(MPIX_CUDA_AWARE_SUPPORT) */ - printf("This MPI library cannot determine if there is CUDA-aware support.\n"); -#endif /* MPIX_CUDA_AWARE_SUPPORT */ - - /* - Test to send and receive Kokkos arrays - */ - int sender_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &sender_rank); - - int neighbor_rank = 0; - if (sender_rank == 0) { - neighbor_rank = 1; - } - else if (sender_rank == 1) { - neighbor_rank = 0; - } - else { - raise::Error("This test is only for 2 ranks", HERE); - } - Kokkos::View send_array("send_array", 10); - Kokkos::View recv_array("recv_array", 10); - if (sender_rank == 0) { - Kokkos::deep_copy(send_array, 10); - } - else { - Kokkos::deep_copy(send_array, 20); - } - - auto send_array_host = Kokkos::create_mirror_view(send_array); - Kokkos::deep_copy(send_array_host, send_array); - auto host_recv_array = Kokkos::create_mirror_view(recv_array); - - MPI_Sendrecv(send_array.data(), send_array.extent(0), MPI_INT, neighbor_rank, 0, - recv_array.data(), recv_array.extent(0), MPI_INT, neighbor_rank, 0, - MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - // Print the received array - Kokkos::deep_copy(host_recv_array, recv_array); - for (int i = 0; i < 10; ++i) { - printf("Rank %d: Received %d\n", sender_rank, host_recv_array(i)); - } - - - std::cout << "Constructing the domain" << std::endl; - // Create a Metadomain object - const unsigned int ndomains = 2; - const std::vector global_decomposition = { - {-1, -1, -1} - }; - const std::vector global_ncells = { 32, 32, 32 }; - const boundaries_t global_extent = { - {0.0, 3.0}, - {0.0, 3.0}, - {0.0, 3.0} - }; - const boundaries_t global_flds_bc = { - {FldsBC::PERIODIC, FldsBC::PERIODIC}, - {FldsBC::PERIODIC, FldsBC::PERIODIC}, - {FldsBC::PERIODIC, FldsBC::PERIODIC} - }; - const boundaries_t global_prtl_bc = { - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC} - }; - const std::map metric_params = {}; - const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; - const double npart_to_send_frac = 0.01; - const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); - auto species = ntt::ParticleSpecies(1u, - "test_e", - 1.0f, - 1.0f, - maxnpart, - ntt::PrtlPusher::BORIS, - false, - ntt::Cooling::NONE); - auto metadomain = Metadomain>( - ndomains, - global_decomposition, - global_ncells, - global_extent, - global_flds_bc, - global_prtl_bc, - metric_params, - { species }); - - const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; - auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); - auto timers = timer::Timers { { "Communication" }, nullptr, false }; - InitializeParticleArrays(*local_domain, npart); - // Timers for both the communication routines - auto total_time_elapsed_old = 0; - auto total_time_elapsed_new = 0; - - int seed_ind = 0; - int seed_tag = 1; - Kokkos::fence(); - - for (int i = 0; i < 10; ++i) { - { - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort new - Kokkos::fence(); - auto start_new = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticlesBuffer(*local_domain, &timers); - auto stop_new = std::chrono::high_resolution_clock::now(); - auto duration_new = std::chrono::duration_cast( - stop_new - start_new) - .count(); - total_time_elapsed_new += duration_new; - Kokkos::fence(); - } - { - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort old - Kokkos::fence(); - auto start_old = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticles(*local_domain, &timers); - auto stop_old = std::chrono::high_resolution_clock::now(); - auto duration_old = std::chrono::duration_cast( - stop_old - start_old) - .count(); - total_time_elapsed_old += duration_old; - Kokkos::fence(); - } - } - printf("Total time elapsed for old: %f us : %f us/prtl\n", - total_time_elapsed_old / 10.0, - total_time_elapsed_old / 10.0 * 1000 / npart); - printf("Total time elapsed for new: %f us : %f us/prtl\n", - total_time_elapsed_new / 10.0, - total_time_elapsed_new / 10.0 * 1000 / npart); + ntt::GlobalInitialize(argc, argv); + try { + // ... + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + GlobalFinalize(); + return 1; } GlobalFinalize(); return 0; } - -/* - Buggy behavior: - Consider a single domain with a single mpi rank - Particle tag arrays is set to [0, 0, 1, 1, 2, 3, ...] for a single domain - CommunicateParticles() discounts all the dead particles and reassigns the - other tags to alive - CommunicateParticlesBuffer() only keeps the ParticleTag::Alive particles - and discounts the rest -*/ diff --git a/legacy/benchmark.cpp b/legacy/benchmark.cpp new file mode 100644 index 000000000..54fc17cf9 --- /dev/null +++ b/legacy/benchmark.cpp @@ -0,0 +1,273 @@ +#include "enums.h" +#include "global.h" + +#include "utils/error.h" + +#include "metrics/metric_base.h" +#include "metrics/minkowski.h" + +#include "framework/containers/species.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + +#include + +#include "framework/domain/communications.cpp" +#include "mpi.h" +#include "mpi-ext.h" + +#define TIMER_START(label) \ + Kokkos::fence(); \ + auto start_##label = std::chrono::high_resolution_clock::now(); + +#define TIMER_STOP(label) \ + Kokkos::fence(); \ + auto stop_##label = std::chrono::high_resolution_clock::now(); \ + auto duration_##label = std::chrono::duration_cast( \ + stop_##label - start_##label) \ + .count(); \ + std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" \ + << std::endl; + +/* + Test to check the performance of the new particle allocation scheme + - Create a metadomain object main() + - Set npart + initialize tags InitializeParticleArrays() + - 'Push' the particles by randomly updating the tags PushParticles() + - Communicate particles to neighbors and time the communication + - Compute the time taken for best of N iterations for the communication + */ +using namespace ntt; + +// Set npart and set the particle tags to alive +template +void InitializeParticleArrays(Domain& domain, const int npart) { + raise::ErrorIf(npart > domain.species[0].maxnpart(), + "Npart cannot be greater than maxnpart", + HERE); + const auto nspecies = domain.species.size(); + for (int i_spec = 0; i_spec < nspecies; i_spec++) { + domain.species[i_spec].set_npart(npart); + domain.species[i_spec].SyncHostDevice(); + auto& this_tag = domain.species[i_spec].tag; + Kokkos::parallel_for( + "Initialize particles", + npart, + Lambda(const std::size_t i) { this_tag(i) = ParticleTag::alive; }); + } + return; +} + +// Randomly reassign tags to particles for a fraction of particles +template +void PushParticles(Domain& domain, + const double send_frac, + const int seed_ind, + const int seed_tag) { + raise::ErrorIf(send_frac > 1.0, "send_frac cannot be greater than 1.0", HERE); + const auto nspecies = domain.species.size(); + for (int i_spec = 0; i_spec < nspecies; i_spec++) { + domain.species[i_spec].set_unsorted(); + const auto nparticles = domain.species[i_spec].npart(); + const auto nparticles_to_send = static_cast(send_frac * nparticles); + // Generate random indices to send + // Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); + Kokkos::View indices_to_send("indices_to_send", nparticles_to_send); + Kokkos::fill_random(indices_to_send, domain.random_pool, 0, nparticles); + // Generate random tags to send + // Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); + Kokkos::View tags_to_send("tags_to_send", nparticles_to_send); + Kokkos::fill_random(tags_to_send, + domain.random_pool, + 0, + domain.species[i_spec].ntags()); + auto& this_tag = domain.species[i_spec].tag; + Kokkos::parallel_for( + "Push particles", + nparticles_to_send, + Lambda(const std::size_t i) { + auto prtl_to_send = indices_to_send(i); + auto tag_to_send = tags_to_send(i); + this_tag(prtl_to_send) = tag_to_send; + }); + domain.species[i_spec].npart_per_tag(); + domain.species[i_spec].SyncHostDevice(); + } + return; +} + +auto main(int argc, char* argv[]) -> int { + GlobalInitialize(argc, argv); + { + /* + MPI checks + */ + printf("Compile time check:\n"); +#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT + printf("This MPI library has CUDA-aware support.\n", MPIX_CUDA_AWARE_SUPPORT); +#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT + printf("This MPI library does not have CUDA-aware support.\n"); +#else + printf("This MPI library cannot determine if there is CUDA-aware support.\n"); +#endif /* MPIX_CUDA_AWARE_SUPPORT */ +printf("Run time check:\n"); +#if defined(MPIX_CUDA_AWARE_SUPPORT) + if (1 == MPIX_Query_cuda_support()) { + printf("This MPI library has CUDA-aware support.\n"); + } else { + printf("This MPI library does not have CUDA-aware support.\n"); + } +#else /* !defined(MPIX_CUDA_AWARE_SUPPORT) */ + printf("This MPI library cannot determine if there is CUDA-aware support.\n"); +#endif /* MPIX_CUDA_AWARE_SUPPORT */ + + /* + Test to send and receive Kokkos arrays + */ + int sender_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &sender_rank); + + int neighbor_rank = 0; + if (sender_rank == 0) { + neighbor_rank = 1; + } + else if (sender_rank == 1) { + neighbor_rank = 0; + } + else { + raise::Error("This test is only for 2 ranks", HERE); + } + Kokkos::View send_array("send_array", 10); + Kokkos::View recv_array("recv_array", 10); + if (sender_rank == 0) { + Kokkos::deep_copy(send_array, 10); + } + else { + Kokkos::deep_copy(send_array, 20); + } + + auto send_array_host = Kokkos::create_mirror_view(send_array); + Kokkos::deep_copy(send_array_host, send_array); + auto host_recv_array = Kokkos::create_mirror_view(recv_array); + + MPI_Sendrecv(send_array.data(), send_array.extent(0), MPI_INT, neighbor_rank, 0, + recv_array.data(), recv_array.extent(0), MPI_INT, neighbor_rank, 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + // Print the received array + Kokkos::deep_copy(host_recv_array, recv_array); + for (int i = 0; i < 10; ++i) { + printf("Rank %d: Received %d\n", sender_rank, host_recv_array(i)); + } + + + std::cout << "Constructing the domain" << std::endl; + // Create a Metadomain object + const unsigned int ndomains = 2; + const std::vector global_decomposition = { + {-1, -1, -1} + }; + const std::vector global_ncells = { 32, 32, 32 }; + const boundaries_t global_extent = { + {0.0, 3.0}, + {0.0, 3.0}, + {0.0, 3.0} + }; + const boundaries_t global_flds_bc = { + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC} + }; + const boundaries_t global_prtl_bc = { + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC} + }; + const std::map metric_params = {}; + const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; + const double npart_to_send_frac = 0.01; + const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); + auto species = ntt::ParticleSpecies(1u, + "test_e", + 1.0f, + 1.0f, + maxnpart, + ntt::PrtlPusher::BORIS, + false, + ntt::Cooling::NONE); + auto metadomain = Metadomain>( + ndomains, + global_decomposition, + global_ncells, + global_extent, + global_flds_bc, + global_prtl_bc, + metric_params, + { species }); + + const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + auto timers = timer::Timers { { "Communication" }, nullptr, false }; + InitializeParticleArrays(*local_domain, npart); + // Timers for both the communication routines + auto total_time_elapsed_old = 0; + auto total_time_elapsed_new = 0; + + int seed_ind = 0; + int seed_tag = 1; + Kokkos::fence(); + + for (int i = 0; i < 10; ++i) { + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort new + Kokkos::fence(); + auto start_new = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticlesBuffer(*local_domain, &timers); + auto stop_new = std::chrono::high_resolution_clock::now(); + auto duration_new = std::chrono::duration_cast( + stop_new - start_new) + .count(); + total_time_elapsed_new += duration_new; + Kokkos::fence(); + } + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort old + Kokkos::fence(); + auto start_old = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticles(*local_domain, &timers); + auto stop_old = std::chrono::high_resolution_clock::now(); + auto duration_old = std::chrono::duration_cast( + stop_old - start_old) + .count(); + total_time_elapsed_old += duration_old; + Kokkos::fence(); + } + } + printf("Total time elapsed for old: %f us : %f us/prtl\n", + total_time_elapsed_old / 10.0, + total_time_elapsed_old / 10.0 * 1000 / npart); + printf("Total time elapsed for new: %f us : %f us/prtl\n", + total_time_elapsed_new / 10.0, + total_time_elapsed_new / 10.0 * 1000 / npart); + } + GlobalFinalize(); + return 0; +} + +/* + Buggy behavior: + Consider a single domain with a single mpi rank + Particle tag arrays is set to [0, 0, 1, 1, 2, 3, ...] for a single domain + CommunicateParticles() discounts all the dead particles and reassigns the + other tags to alive + CommunicateParticlesBuffer() only keeps the ParticleTag::Alive particles + and discounts the rest +*/ From 3f2674c23088ca344a9e61135f79815672d610ad Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 11:05:27 -0500 Subject: [PATCH 098/124] cleanup prep for release --- dev/nix/adios2.nix | 17 ---- setups/srpic/blob/blob.toml | 66 ------------- setups/srpic/blob/pgen.hpp | 103 -------------------- setups/{srpic => tests}/blob/blob.py | 0 setups/tests/blob/blob.toml | 76 +++++++-------- setups/{srpic => tests}/blob/nparts.py | 0 setups/tests/blob/pgen.hpp | 126 +++++++++++-------------- src/framework/containers/particles.cpp | 54 ----------- 8 files changed, 92 insertions(+), 350 deletions(-) delete mode 100644 setups/srpic/blob/blob.toml delete mode 100644 setups/srpic/blob/pgen.hpp rename setups/{srpic => tests}/blob/blob.py (100%) rename setups/{srpic => tests}/blob/nparts.py (100%) diff --git a/dev/nix/adios2.nix b/dev/nix/adios2.nix index f2cd4ca43..8ec1fd36c 100644 --- a/dev/nix/adios2.nix +++ b/dev/nix/adios2.nix @@ -52,23 +52,6 @@ pkgs.stdenv.mkDerivation { } ''; - # configurePhase = - # '' - # cmake -B build $src \ - # -D CMAKE_CXX_STANDARD=17 \ - # -D CMAKE_CXX_EXTENSIONS=OFF \ - # -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ - # -D BUILD_SHARED_LIBS=ON \ - # -D ADIOS2_USE_HDF5=${if hdf5 then "ON" else "OFF"} \ - # -D ADIOS2_USE_Python=OFF \ - # -D ADIOS2_USE_Fortran=OFF \ - # -D ADIOS2_USE_ZeroMQ=OFF \ - # -D BUILD_TESTING=OFF \ - # -D ADIOS2_BUILD_EXAMPLES=OFF \ - # -D ADIOS2_USE_MPI=${if mpi then "ON" else "OFF"} \ - # -D CMAKE_BUILD_TYPE=Release - # '' - buildPhase = '' cmake --build build -j ''; diff --git a/setups/srpic/blob/blob.toml b/setups/srpic/blob/blob.toml deleted file mode 100644 index 7a047f348..000000000 --- a/setups/srpic/blob/blob.toml +++ /dev/null @@ -1,66 +0,0 @@ -[simulation] - name = "blob" - engine = "srpic" - runtime = 100.0 - - [simulation.domain] - decomposition = [2, 1, 1] - -[grid] - resolution = [1024, 1024] - extent = [[-10.0, 10.0], [-10.0, 10.0]] - - [grid.metric] - metric = "minkowski" - - [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] - particles = [["PERIODIC"], ["PERIODIC"]] - -[scales] - larmor0 = 1.0 - skindepth0 = 1.0 - -[algorithms] - current_filters = 4 - - [algorithms.timestep] - CFL = 0.5 - -[particles] - ppc0 = 16.0 - - [[particles.species]] - label = "e-_p" - mass = 1.0 - charge = -1.0 - maxnpart = 1e7 - - [[particles.species]] - label = "e+_p" - mass = 1.0 - charge = 1.0 - maxnpart = 1e7 - -[setup] - temp_1 = 1e-4 - x1c = -5.0 - x2c = 0.0 - v_max = 50.0 - dr = 1.0 - -[output] - format = "hdf5" - interval_time = 1.0 - - [output.fields] - quantities = ["N_1", "N_2", "B", "E"] - - [output.particles] - enable = false - - [output.spectra] - enable = false - -[diagnostics] - colored_stdout = false diff --git a/setups/srpic/blob/pgen.hpp b/setups/srpic/blob/pgen.hpp deleted file mode 100644 index f7b7d71b5..000000000 --- a/setups/srpic/blob/pgen.hpp +++ /dev/null @@ -1,103 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "enums.h" -#include "global.h" - -#include "arch/kokkos_aliases.h" -#include "arch/traits.h" - -#include "archetypes/energy_dist.h" -#include "archetypes/particle_injector.h" -#include "archetypes/problem_generator.h" -#include "framework/domain/domain.h" -#include "framework/domain/metadomain.h" - -namespace user { - using namespace ntt; - - template - struct CounterstreamEnergyDist : public arch::EnergyDistribution { - CounterstreamEnergyDist(const M& metric, real_t v_max) - : arch::EnergyDistribution { metric } - , v_max { v_max } {} - - Inline void operator()(const coord_t& x_Ph, - vec_t& v, - unsigned short sp) const override { - v[0] = v_max; - } - - private: - const real_t v_max; - }; - - template - struct GaussianDist : public arch::SpatialDistribution { - GaussianDist(const M& metric, real_t x1c, real_t x2c, real_t dr) - : arch::SpatialDistribution { metric } - , x1c { x1c } - , x2c { x2c } - , dr { dr } {} - - // to properly scale the number density, the probability should be normalized to 1 - Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - if (math::abs(x_Ph[0] - x1c) < dr && math::abs(x_Ph[1] - x2c) < dr) { - return 1.0; - } else { - return 0.0; - } - } - - private: - const real_t x1c, x2c, dr; - }; - - template - struct PGen : public arch::ProblemGenerator { - - // compatibility traits for the problem generator - static constexpr auto engines = traits::compatible_with::value; - static constexpr auto metrics = traits::compatible_with::value; - static constexpr auto dimensions = - traits::compatible_with::value; - - // for easy access to variables in the child class - using arch::ProblemGenerator::D; - using arch::ProblemGenerator::C; - using arch::ProblemGenerator::params; - - const real_t temp_1, x1c, x2c, dr, v_max; - - inline PGen(const SimulationParams& p, const Metadomain& global_domain) - : arch::ProblemGenerator { p } - , temp_1 { p.template get("setup.temp_1") } - , x1c { p.template get("setup.x1c") } - , x2c { p.template get("setup.x2c") } - , v_max { p.template get("setup.v_max") } - , dr { p.template get("setup.dr") } {} - - inline void InitPrtls(Domain& local_domain) { - const auto energy_dist = CounterstreamEnergyDist(local_domain.mesh.metric, - v_max); - const auto spatial_dist = GaussianDist(local_domain.mesh.metric, - x1c, - x2c, - dr); - const auto injector = - arch::NonUniformInjector( - energy_dist, - spatial_dist, - { 1, 2 }); - - arch::InjectNonUniform>( - params, - local_domain, - injector, - 1.0); - } - }; - -} // namespace user - -#endif diff --git a/setups/srpic/blob/blob.py b/setups/tests/blob/blob.py similarity index 100% rename from setups/srpic/blob/blob.py rename to setups/tests/blob/blob.py diff --git a/setups/tests/blob/blob.toml b/setups/tests/blob/blob.toml index fffa5fff1..7a047f348 100644 --- a/setups/tests/blob/blob.toml +++ b/setups/tests/blob/blob.toml @@ -1,32 +1,25 @@ [simulation] - name = "blob-1x1x2" - engine = "srpic" - runtime = 5.0 + name = "blob" + engine = "srpic" + runtime = 100.0 [simulation.domain] - decomposition = [1, 1, 2] + decomposition = [2, 1, 1] [grid] - resolution = [128, 192, 64] - # extent = [[1.0, 10.0]] - extent = [[-2.0, 2.0], [-3.0, 3.0], [-1.0, 1.0]] + resolution = [1024, 1024] + extent = [[-10.0, 10.0], [-10.0, 10.0]] [grid.metric] - # metric = "qspherical" metric = "minkowski" [grid.boundaries] - # fields = [["ATMOSPHERE", "ABSORB"]] - # particles = [["ATMOSPHERE", "ABSORB"]] - fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - - # [grid.boundaries.absorb] - # ds = 1.0 - + fields = [["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"]] + [scales] - larmor0 = 2e-5 - skindepth0 = 0.01 + larmor0 = 1.0 + skindepth0 = 1.0 [algorithms] current_filters = 4 @@ -35,32 +28,39 @@ CFL = 0.5 [particles] - ppc0 = 20.0 - # use_weights = true + ppc0 = 16.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e7 - pusher = "Boris" + label = "e-_p" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e7 - pusher = "Boris" + label = "e+_p" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 [setup] - xi_min = [0.55, 1.85, -0.25] - xi_max = [0.65, 2.3, -0.1] - v1 = [0.25, -0.55, 0.0] - v2 = [-0.75, -0.15, 0.0] - + temp_1 = 1e-4 + x1c = -5.0 + x2c = 0.0 + v_max = 50.0 + dr = 1.0 + [output] - format = "hdf5" - interval_time = 0.02 + format = "hdf5" + interval_time = 1.0 [output.fields] - quantities = ["Nppc_1", "Nppc_2", "E", "B", "J"] + quantities = ["N_1", "N_2", "B", "E"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + +[diagnostics] + colored_stdout = false diff --git a/setups/srpic/blob/nparts.py b/setups/tests/blob/nparts.py similarity index 100% rename from setups/srpic/blob/nparts.py rename to setups/tests/blob/nparts.py diff --git a/setups/tests/blob/pgen.hpp b/setups/tests/blob/pgen.hpp index d07240bfd..f7b7d71b5 100644 --- a/setups/tests/blob/pgen.hpp +++ b/setups/tests/blob/pgen.hpp @@ -10,107 +10,89 @@ #include "archetypes/energy_dist.h" #include "archetypes/particle_injector.h" #include "archetypes/problem_generator.h" -#include "archetypes/spatial_dist.h" +#include "framework/domain/domain.h" #include "framework/domain/metadomain.h" -#include - namespace user { using namespace ntt; template - struct Beam : public arch::EnergyDistribution { - Beam(const M& metric, - const std::vector& v1_vec, - const std::vector& v2_vec) - : arch::EnergyDistribution { metric } { - std::copy(v1_vec.begin(), v1_vec.end(), v1); - std::copy(v2_vec.begin(), v2_vec.end(), v2); - } - - Inline void operator()(const coord_t&, - vec_t& v_Ph, - unsigned short sp) const override { - if (sp == 1) { - v_Ph[0] = v1[0]; - v_Ph[1] = v1[1]; - v_Ph[2] = v1[2]; - } else { - v_Ph[0] = v2[0]; - v_Ph[1] = v2[1]; - v_Ph[2] = v2[2]; - } + struct CounterstreamEnergyDist : public arch::EnergyDistribution { + CounterstreamEnergyDist(const M& metric, real_t v_max) + : arch::EnergyDistribution { metric } + , v_max { v_max } {} + + Inline void operator()(const coord_t& x_Ph, + vec_t& v, + unsigned short sp) const override { + v[0] = v_max; } private: - vec_t v1; - vec_t v2; + const real_t v_max; }; template - struct PointDistribution : public arch::SpatialDistribution { - PointDistribution(const M& metric, - const std::vector& xi_min, - const std::vector& xi_max) - : arch::SpatialDistribution { metric } { - std::copy(xi_min.begin(), xi_min.end(), x_min); - std::copy(xi_max.begin(), xi_max.end(), x_max); - } - + struct GaussianDist : public arch::SpatialDistribution { + GaussianDist(const M& metric, real_t x1c, real_t x2c, real_t dr) + : arch::SpatialDistribution { metric } + , x1c { x1c } + , x2c { x2c } + , dr { dr } {} + + // to properly scale the number density, the probability should be normalized to 1 Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - auto fill = true; - for (auto d = 0u; d < M::Dim; ++d) { - fill &= x_Ph[d] > x_min[d] and x_Ph[d] < x_max[d]; + if (math::abs(x_Ph[0] - x1c) < dr && math::abs(x_Ph[1] - x2c) < dr) { + return 1.0; + } else { + return 0.0; } - return fill ? ONE : ZERO; } private: - tuple_t x_min; - tuple_t x_max; + const real_t x1c, x2c, dr; }; template struct PGen : public arch::ProblemGenerator { + // compatibility traits for the problem generator - static constexpr auto engines { traits::compatible_with::value }; - static constexpr auto metrics { - traits::compatible_with::value - }; - static constexpr auto dimensions { - traits::compatible_with::value - }; + static constexpr auto engines = traits::compatible_with::value; + static constexpr auto metrics = traits::compatible_with::value; + static constexpr auto dimensions = + traits::compatible_with::value; // for easy access to variables in the child class using arch::ProblemGenerator::D; using arch::ProblemGenerator::C; using arch::ProblemGenerator::params; - const std::vector xi_min; - const std::vector xi_max; - const std::vector v1; - const std::vector v2; - - inline PGen(const SimulationParams& p, const Metadomain& m) - : arch::ProblemGenerator(p) - , xi_min { p.template get>("setup.xi_min") } - , xi_max { p.template get>("setup.xi_max") } - , v1 { p.template get>("setup.v1") } - , v2 { p.template get>("setup.v2") } {} - - inline void InitPrtls(Domain& domain) { - const auto energy_dist = Beam(domain.mesh.metric, v1, v2); - const auto spatial_dist = PointDistribution(domain.mesh.metric, - xi_min, - xi_max); - const auto injector = arch::NonUniformInjector( - energy_dist, - spatial_dist, - { 1, 2 }); - - arch::InjectNonUniform>( + const real_t temp_1, x1c, x2c, dr, v_max; + + inline PGen(const SimulationParams& p, const Metadomain& global_domain) + : arch::ProblemGenerator { p } + , temp_1 { p.template get("setup.temp_1") } + , x1c { p.template get("setup.x1c") } + , x2c { p.template get("setup.x2c") } + , v_max { p.template get("setup.v_max") } + , dr { p.template get("setup.dr") } {} + + inline void InitPrtls(Domain& local_domain) { + const auto energy_dist = CounterstreamEnergyDist(local_domain.mesh.metric, + v_max); + const auto spatial_dist = GaussianDist(local_domain.mesh.metric, + x1c, + x2c, + dr); + const auto injector = + arch::NonUniformInjector( + energy_dist, + spatial_dist, + { 1, 2 }); + + arch::InjectNonUniform>( params, - domain, + local_domain, injector, 1.0); } diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index 50b410270..d78055824 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -235,60 +235,6 @@ namespace ntt { m_is_sorted = true; } - // template - // void Particles::PrintTags() { - // auto tag_h = Kokkos::create_mirror_view(tag); - // Kokkos::deep_copy(tag_h, tag); - // auto i1_h = Kokkos::create_mirror_view(i1); - // Kokkos::deep_copy(i1_h, i1); - // auto dx1_h = Kokkos::create_mirror_view(dx1); - // Kokkos::deep_copy(dx1_h, dx1); - // std::cout << "species " << label() << " [npart = " << npart() << "]" - // << std::endl; - // std::cout << "idxs: "; - // for (auto i = 0; i < IMIN(tag_h.extent(0), 30); ++i) { - // std::cout << std::setw(3) << i << " "; - // if (i == npart() - 1) { - // std::cout << "| "; - // } - // } - // if (tag_h.extent(0) > 30) { - // std::cout << "... " << std::setw(3) << tag_h.extent(0) - 1; - // } - // std::cout << std::endl << "tags: "; - // for (auto i = 0; i < IMIN(tag_h.extent(0), 30); ++i) { - // std::cout << std::setw(3) << (short)tag_h(i) << " "; - // if (i == npart() - 1) { - // std::cout << "| "; - // } - // } - // if (tag_h.extent(0) > 30) { - // std::cout << "..." << std::setw(3) << (short)tag_h(tag_h.extent(0) - 1); - // } - // std::cout << std::endl << "i1s : "; - // for (auto i = 0; i < IMIN(i1_h.extent(0), 30); ++i) { - // std::cout << std::setw(3) << i1_h(i) << " "; - // if (i == npart() - 1) { - // std::cout << "| "; - // } - // } - // if (i1_h.extent(0) > 30) { - // std::cout << "..." << std::setw(3) << i1_h(i1_h.extent(0) - 1); - // } - // std::cout << std::endl << "dx1s : "; - // for (auto i = 0; i < IMIN(dx1_h.extent(0), 30); ++i) { - // std::cout << std::setprecision(2) << std::setw(3) << dx1_h(i) << " "; - // if (i == npart() - 1) { - // std::cout << "| "; - // } - // } - // if (dx1_h.extent(0) > 30) { - // std::cout << "..." << std::setprecision(2) << std::setw(3) - // << dx1_h(dx1_h.extent(0) - 1); - // } - // std::cout << std::endl; - // } - template struct Particles; template struct Particles; template struct Particles; From 2b6ef3c5542c53225c60e5bd21585cc105fbca58 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 12:04:54 -0500 Subject: [PATCH 099/124] tested with/without mpi cpu+hip --- src/framework/tests/particles.cpp | 12 +++-- src/global/tests/kokkos_aliases.cpp | 6 +-- src/kernels/particle_pusher_sr.hpp | 70 ++++++++++++++--------------- src/kernels/tests/deposit.cpp | 40 ++++++----------- src/kernels/tests/prtl_bc.cpp | 18 ++++---- 5 files changed, 68 insertions(+), 78 deletions(-) diff --git a/src/framework/tests/particles.cpp b/src/framework/tests/particles.cpp index 535198286..6c4c227b5 100644 --- a/src/framework/tests/particles.cpp +++ b/src/framework/tests/particles.cpp @@ -46,8 +46,10 @@ void testParticles(const int& index, raise::ErrorIf(p.tag.extent(0) != maxnpart, "tag incorrectly allocated", HERE); raise::ErrorIf(p.weight.extent(0) != maxnpart, "weight incorrectly allocated", HERE); - raise::ErrorIf(p.pld.extent(1) != npld, "pld incorrectly allocated", HERE); - raise::ErrorIf(p.pld.extent(0) != maxnpart, "pld incorrectly allocated", HERE); + if (npld > 0) { + raise::ErrorIf(p.pld.extent(0) != maxnpart, "pld incorrectly allocated", HERE); + raise::ErrorIf(p.pld.extent(1) != npld, "pld incorrectly allocated", HERE); + } if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { raise::ErrorIf(p.i2.extent(0) != maxnpart, "i2 incorrectly allocated", HERE); @@ -115,7 +117,8 @@ auto main(int argc, char** argv) -> int { 0.0, 100, PrtlPusher::PHOTON, - Cooling::NONE); + Cooling::NONE, + 5); testParticles(4, "e+", 1.0, @@ -129,7 +132,8 @@ auto main(int argc, char** argv) -> int { 1.0, 100, PrtlPusher::BORIS, - Cooling::NONE); + Cooling::NONE, + 1); } catch (const std::exception& e) { std::cerr << "Error: " << e.what() << std::endl; Kokkos::finalize(); diff --git a/src/global/tests/kokkos_aliases.cpp b/src/global/tests/kokkos_aliases.cpp index 56a17c50f..909b6b30c 100644 --- a/src/global/tests/kokkos_aliases.cpp +++ b/src/global/tests/kokkos_aliases.cpp @@ -3,6 +3,7 @@ #include "global.h" #include +#include #include #include @@ -44,8 +45,7 @@ auto main(int argc, char* argv[]) -> int { { // scatter arrays & ranges array_t a { "a", 100 }; - scatter_array_t a_scatter = Kokkos::Experimental::create_scatter_view( - a); + auto a_scatter = Kokkos::Experimental::create_scatter_view(a); Kokkos::parallel_for( // range_t({ 0 }, { 100 }), CreateRangePolicy({ 0 }, { 100 }), @@ -87,4 +87,4 @@ auto main(int argc, char* argv[]) -> int { Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index b4808f12a..2e8a5f652 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -227,41 +227,41 @@ namespace kernel::sr { const real_t coeff_sync; public: - Pusher_kernel(const PrtlPusher::type& pusher, - bool GCA, - bool ext_force, - CoolingTags cooling, - const ndfield_t& EB, - unsigned short sp, - array_t& i1, - array_t& i2, - array_t& i3, - array_t& i1_prev, - array_t& i2_prev, - array_t& i3_prev, - array_t& dx1, - array_t& dx2, - array_t& dx3, - array_t& dx1_prev, - array_t& dx2_prev, - array_t& dx3_prev, - array_t& ux1, - array_t& ux2, - array_t& ux3, - array_t& phi, - array_t& tag, - const M& metric, - const F& force, - real_t time, - real_t coeff, - real_t dt, - int ni1, - int ni2, - int ni3, - const boundaries_t& boundaries, - real_t gca_larmor_max, - real_t gca_eovrb_max, - real_t coeff_sync) + Pusher_kernel(const PrtlPusher::type& pusher, + bool GCA, + bool ext_force, + CoolingTags cooling, + const randacc_ndfield_t& EB, + unsigned short sp, + array_t& i1, + array_t& i2, + array_t& i3, + array_t& i1_prev, + array_t& i2_prev, + array_t& i3_prev, + array_t& dx1, + array_t& dx2, + array_t& dx3, + array_t& dx1_prev, + array_t& dx2_prev, + array_t& dx3_prev, + array_t& ux1, + array_t& ux2, + array_t& ux3, + array_t& phi, + array_t& tag, + const M& metric, + const F& force, + real_t time, + real_t coeff, + real_t dt, + int ni1, + int ni2, + int ni3, + const boundaries_t& boundaries, + real_t gca_larmor_max, + real_t gca_eovrb_max, + real_t coeff_sync) : pusher { pusher } , GCA { GCA } , ext_force { ext_force } diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index 9a8ae1cc6..ec364a313 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -29,8 +29,7 @@ void errorIf(bool condition, const std::string& message) { inline static constexpr auto epsilon = std::numeric_limits::epsilon(); -Inline auto equal(real_t a, real_t b, const char* msg = "", real_t acc = ONE) - -> bool { +Inline auto equal(real_t a, real_t b, const char* msg = "", real_t acc = ONE) -> bool { const auto eps = epsilon * acc; if (not cmp::AlmostEqual(a, b, eps)) { printf("%.12e != %.12e %s\n", a, b, msg); @@ -81,8 +80,6 @@ void testDeposit(const std::vector& res, array_t tag { "tag", 10 }; const real_t charge { 1.0 }, inv_dt { 1.0 }; - auto J_scat = Kokkos::Experimental::create_scatter_view(J); - const int i0 = 4, j0 = 4; const prtldx_t dxi = 0.53, dxf = 0.47; @@ -122,30 +119,19 @@ void testDeposit(const std::vector& res, put_value(weight, 1.0, 0); put_value(tag, ParticleTag::alive, 0); - Kokkos::parallel_for("CurrentsDeposit", - 10, + auto J_scat = Kokkos::Experimental::create_scatter_view(J); + + // clang-format off + Kokkos::parallel_for("CurrentsDeposit", 10, kernel::DepositCurrents_kernel(J_scat, - i1, - i2, - i3, - i1_prev, - i2_prev, - i3_prev, - dx1, - dx2, - dx3, - dx1_prev, - dx2_prev, - dx3_prev, - ux1, - ux2, - ux3, - phi, - weight, - tag, - metric, - charge, - inv_dt)); + i1, i2, i3, + i1_prev, i2_prev, i3_prev, + dx1, dx2, dx3, + dx1_prev, dx2_prev, dx3_prev, + ux1, ux2, ux3, + phi, weight, tag, + metric, charge, inv_dt)); + // clang-format on Kokkos::Experimental::contribute(J, J_scat); diff --git a/src/kernels/tests/prtl_bc.cpp b/src/kernels/tests/prtl_bc.cpp index c8f9eae04..14c1a9f54 100644 --- a/src/kernels/tests/prtl_bc.cpp +++ b/src/kernels/tests/prtl_bc.cpp @@ -201,9 +201,9 @@ void testPeriodicBC(const std::vector& res, // Particle boundaries auto boundaries = boundaries_t {}; boundaries = { - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC} + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC } }; real_t time = ZERO; @@ -343,18 +343,18 @@ auto main(int argc, char* argv[]) -> int { const std::vector res1d { 50 }; const boundaries_t ext1d { - {0.0, 1000.0}, + { 0.0, 1000.0 }, }; const std::vector res2d { 30, 20 }; const boundaries_t ext2d { - {-15.0, 15.0}, - {-10.0, 10.0}, + { -15.0, 15.0 }, + { -10.0, 10.0 }, }; const std::vector res3d { 10, 10, 10 }; const boundaries_t ext3d { - {0.0, 1.0}, - {0.0, 1.0}, - {0.0, 1.0} + { 0.0, 1.0 }, + { 0.0, 1.0 }, + { 0.0, 1.0 } }; testPeriodicBC>(res1d, ext1d, {}); testPeriodicBC>(res2d, ext2d, {}); From 08bc485edcf98f0948fe92b0b60b0127799fdda1 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 29 Jan 2025 14:16:29 -0500 Subject: [PATCH 100/124] rm conflicting file --- src/engines/engine_step_report.cpp | 293 ----------------------------- 1 file changed, 293 deletions(-) delete mode 100644 src/engines/engine_step_report.cpp diff --git a/src/engines/engine_step_report.cpp b/src/engines/engine_step_report.cpp deleted file mode 100644 index 1681aabcc..000000000 --- a/src/engines/engine_step_report.cpp +++ /dev/null @@ -1,293 +0,0 @@ -#include "enums.h" -#include "global.h" - -#include "arch/mpi_aliases.h" -#include "utils/colors.h" -#include "utils/formatting.h" -#include "utils/progressbar.h" -#include "utils/timer.h" - -#include "metrics/kerr_schild.h" -#include "metrics/kerr_schild_0.h" -#include "metrics/minkowski.h" -#include "metrics/qkerr_schild.h" -#include "metrics/qspherical.h" -#include "metrics/spherical.h" - -#include "engines/engine.hpp" - -#include -#include - -namespace ntt { - namespace {} // namespace - - template - void print_particles(const Metadomain&, - unsigned short, - DiagFlags, - std::ostream& = std::cout); - - template - void Engine::print_step_report(timer::Timers& timers, - pbar::DurationHistory& time_history, - bool print_output, - bool print_sorting) const { - DiagFlags diag_flags = Diag::Default; - TimerFlags timer_flags = Timer::Default; - if (not m_params.get("diagnostics.colored_stdout")) { - diag_flags ^= Diag::Colorful; - timer_flags ^= Timer::Colorful; - } - if (m_params.get("particles.nspec") == 0) { - diag_flags ^= Diag::Species; - } - if (print_output) { - timer_flags |= Timer::PrintOutput; - } - if (print_sorting) { - timer_flags |= Timer::PrintSorting; - } - CallOnce( - [diag_flags](auto& time, auto& step, auto& max_steps, auto& dt) { - const auto c_bgreen = color::get_color("bgreen", - diag_flags & Diag::Colorful); - const auto c_bblack = color::get_color("bblack", - diag_flags & Diag::Colorful); - const auto c_reset = color::get_color("reset", diag_flags & Diag::Colorful); - std::cout << fmt::format("Step:%s %-8d%s %s[of %d]%s\n", - c_bgreen.c_str(), - step, - c_reset.c_str(), - c_bblack.c_str(), - max_steps, - c_reset.c_str()); - std::cout << fmt::format("Time:%s %-8.4f%s %s[Ξ”t = %.4f]%s\n", - c_bgreen.c_str(), - (double)time, - c_reset.c_str(), - c_bblack.c_str(), - (double)dt, - c_reset.c_str()) - << std::endl; - }, - time, - step, - max_steps, - dt); - if (diag_flags & Diag::Timers) { - timers.printAll(timer_flags, std::cout); - } - CallOnce([]() { - std::cout << std::endl; - }); - if (diag_flags & Diag::Species) { - CallOnce([diag_flags]() { - std::cout << color::get_color("bblack", diag_flags & Diag::Colorful); -#if defined(MPI_ENABLED) - std::cout << "Particle count:" << std::setw(22) << std::right << "[TOT]" - << std::setw(20) << std::right << "[MIN (%)]" << std::setw(20) - << std::right << "[MAX (%)]"; -#else - std::cout << "Particle count:" << std::setw(25) << std::right - << "[TOT (%)]"; -#endif - std::cout << color::get_color("reset", diag_flags & Diag::Colorful) - << std::endl; - }); - for (std::size_t sp { 0 }; sp < m_metadomain.species_params().size(); ++sp) { - print_particles(m_metadomain, sp, diag_flags, std::cout); - } - CallOnce([]() { - std::cout << std::endl; - }); - } - if (diag_flags & Diag::Progress) { - pbar::ProgressBar(time_history, step, max_steps, diag_flags, std::cout); - } - CallOnce([]() { - std::cout << std::setw(80) << std::setfill('.') << "" << std::endl - << std::endl; - }); - } - - template - void print_particles(const Metadomain& md, - unsigned short sp, - DiagFlags flags, - std::ostream& os) { - - static_assert(M::is_metric, "template arg for Engine class has to be a metric"); - std::size_t npart { 0 }; - std::size_t maxnpart { 0 }; - std::string species_label; - int species_index; - // sum npart & maxnpart over all subdomains on the current rank - md.runOnLocalDomainsConst( - [&npart, &maxnpart, &species_label, &species_index, sp](auto& dom) { - npart += dom.species[sp].npart(); - maxnpart += dom.species[sp].maxnpart(); - species_label = dom.species[sp].label(); - species_index = dom.species[sp].index(); - }); -#if defined(MPI_ENABLED) - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - std::vector mpi_npart(size, 0); - std::vector mpi_maxnpart(size, 0); - MPI_Gather(&npart, - 1, - mpi::get_type(), - mpi_npart.data(), - 1, - mpi::get_type(), - MPI_ROOT_RANK, - MPI_COMM_WORLD); - MPI_Gather(&maxnpart, - 1, - mpi::get_type(), - mpi_maxnpart.data(), - 1, - mpi::get_type(), - MPI_ROOT_RANK, - MPI_COMM_WORLD); - if (rank != MPI_ROOT_RANK) { - return; - } - auto tot_npart = std::accumulate(mpi_npart.begin(), mpi_npart.end(), 0); - std::size_t npart_max = *std::max_element(mpi_npart.begin(), mpi_npart.end()); - std::size_t npart_min = *std::min_element(mpi_npart.begin(), mpi_npart.end()); - std::vector mpi_load(size, 0.0); - for (auto r { 0 }; r < size; ++r) { - mpi_load[r] = 100.0 * (double)(mpi_npart[r]) / (double)(mpi_maxnpart[r]); - } - double load_max = *std::max_element(mpi_load.begin(), mpi_load.end()); - double load_min = *std::min_element(mpi_load.begin(), mpi_load.end()); - auto npart_min_str = npart_min > 9999 - ? fmt::format("%.2Le", (long double)npart_min) - : std::to_string(npart_min); - auto tot_npart_str = tot_npart > 9999 - ? fmt::format("%.2Le", (long double)tot_npart) - : std::to_string(tot_npart); - auto npart_max_str = npart_max > 9999 - ? fmt::format("%.2Le", (long double)npart_max) - : std::to_string(npart_max); - os << " species " << fmt::format("%2d", species_index) << " (" - << species_label << ")"; - - const auto c_bblack = color::get_color("bblack", flags & Diag::Colorful); - const auto c_red = color::get_color("red", flags & Diag::Colorful); - const auto c_yellow = color::get_color("yellow", flags & Diag::Colorful); - const auto c_green = color::get_color("green", flags & Diag::Colorful); - const auto c_reset = color::get_color("reset", flags & Diag::Colorful); - auto c_loadmin = (load_min > 80) ? c_red - : ((load_min > 50) ? c_yellow : c_green); - auto c_loadmax = (load_max > 80) ? c_red - : ((load_max > 50) ? c_yellow : c_green); - const auto raw1 = fmt::format("%s (%4.1f%%)", npart_min_str.c_str(), load_min); - const auto raw2 = fmt::format("%s (%4.1f%%)", npart_max_str.c_str(), load_max); - os << c_bblack - << fmt::pad(tot_npart_str, 20, '.', false).substr(0, 20 - tot_npart_str.size()) - << c_reset << tot_npart_str; - os << fmt::pad(raw1, 20, ' ', false).substr(0, 20 - raw1.size()) - << fmt::format("%s (%s%4.1f%%%s)", - npart_min_str.c_str(), - c_loadmin.c_str(), - load_min, - c_reset.c_str()); - os << fmt::pad(raw2, 20, ' ', false).substr(0, 20 - raw2.size()) - << fmt::format("%s (%s%4.1f%%%s)", - npart_max_str.c_str(), - c_loadmax.c_str(), - load_max, - c_reset.c_str()); -#else // not MPI_ENABLED - auto load = 100.0 * (double)(npart) / (double)(maxnpart); - auto npart_str = npart > 9999 ? fmt::format("%.2Le", (long double)npart) - : std::to_string(npart); - const auto c_bblack = color::get_color("bblack", flags & Diag::Colorful); - const auto c_red = color::get_color("red", flags & Diag::Colorful); - const auto c_yellow = color::get_color("yellow", flags & Diag::Colorful); - const auto c_green = color::get_color("green", flags & Diag::Colorful); - const auto c_reset = color::get_color("reset", flags & Diag::Colorful); - const auto c_load = (load > 80) - ? c_red.c_str() - : ((load > 50) ? c_yellow.c_str() : c_green.c_str()); - os << " species " << species_index << " (" << species_label << ")"; - const auto raw = fmt::format("%s (%4.1f%%)", npart_str.c_str(), load); - os << c_bblack << fmt::pad(raw, 24, '.').substr(0, 24 - raw.size()) << c_reset; - os << fmt::format("%s (%s%4.1f%%%s)", - npart_str.c_str(), - c_load, - load, - c_reset.c_str()); -#endif - os << std::endl; - } - - template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; - template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; - template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; - template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; - template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; - template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; - template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; - template void Engine>::print_step_report(timer::Timers&,pbar::DurationHistory&,bool,bool) const; -} // namespace ntt - -// template -// auto Simulation::PrintDiagnostics(const std::size_t& step, -// const real_t& time, -// const timer::Timers& timers, -// std::vector& tstep_durations, -// const DiagFlags diag_flags, -// std::ostream& os) -> void { -// if (tstep_durations.size() > m_params.diagMaxnForPbar()) { -// tstep_durations.erase(tstep_durations.begin()); -// } -// tstep_durations.push_back(timers.get("Total")); -// if (step % m_params.diagInterval() == 0) { -// auto& mblock = this->meshblock; -// const auto title { -// fmt::format("Time = %f : step = %d : Ξ”t = %f", time, step, mblock.timestep()) -// }; -// PrintOnce( -// [](std::ostream& os, std::string title) { -// os << title << std::endl; -// }, -// os, -// title); -// if (diag_flags & DiagFlags_Timers) { -// timers.printAll("", timer::TimerFlags_Default, os); -// } -// if (diag_flags & DiagFlags_Species) { -// auto header = fmt::format("%s %27s", "[SPECIES]", "[TOT]"); -// #if defined(MPI_ENABLED) -// header += fmt::format("%17s %s", "[MIN (%) :", "MAX (%)]"); -// #endif -// PrintOnce( -// [](std::ostream& os, std::string header) { -// os << header << std::endl; -// }, -// os, -// header); -// for (const auto& species : meshblock.particles) { -// species.PrintParticleCounts(os); -// } -// } -// if (diag_flags & DiagFlags_Progress) { -// PrintOnce( -// [](std::ostream& os) { -// os << std::setw(65) << std::setfill('-') << "" << std::endl; -// }, -// os); -// ProgressBar(tstep_durations, time, m_params.totalRuntime(), os); -// } -// PrintOnce( -// [](std::ostream& os) { -// os << std::setw(65) << std::setfill('=') << "" << std::endl; -// }, -// os); -// } -// } From c9b4591f03601c2e8bf472e68435d7eacc59e063 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 14:26:20 -0500 Subject: [PATCH 101/124] nix-shell upd --- dev/nix/shell.nix | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 dev/nix/shell.nix diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix new file mode 100644 index 000000000..1f21e82b0 --- /dev/null +++ b/dev/nix/shell.nix @@ -0,0 +1,49 @@ +{ + pkgs ? import { }, + mpi ? false, + hdf5 ? false, + gpu ? "none", + arch ? "native", +}: + +let + name = "entity-dev"; + adios2Pkg = (pkgs.callPackage ./adios2.nix { inherit pkgs mpi hdf5; }); + kokkosPkg = (pkgs.callPackage ./kokkos.nix { inherit pkgs arch gpu; }); +in +pkgs.mkShell { + name = "${name}-env"; + nativeBuildInputs = with pkgs; [ + zlib + cmake + + clang-tools + + adios2Pkg + kokkosPkg + + python312 + python312Packages.jupyter + + cmake-format + cmake-lint + neocmakelsp + black + pyright + taplo + vscode-langservers-extracted + ]; + + LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath ([ + pkgs.stdenv.cc.cc + pkgs.zlib + ]); + + shellHook = '' + BLUE='\033[0;34m' + NC='\033[0m' + + echo "" + echo -e "${name} nix-shell activated" + ''; +} From c89ea772902c0c427774250001f4da4a81892c88 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 14:37:07 -0500 Subject: [PATCH 102/124] kokkos v4.5.01 fixed --- extern/Kokkos | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extern/Kokkos b/extern/Kokkos index 5fc08a9a7..175257a51 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit 5fc08a9a7da14d8530f8c7035d008ef63ddb4e5c +Subproject commit 175257a51ff29a0059ec48bcd233ee096b2c0438 From e1d7d522ad22e00175721332d85b6a1f63689d69 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 14:37:45 -0500 Subject: [PATCH 103/124] adios v2.10.2 fixed --- extern/adios2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extern/adios2 b/extern/adios2 index a6e8314cc..a19dad6ce 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit a6e8314cc3c0b28d496b44dcd4f15685013b887b +Subproject commit a19dad6cecb00319825f20fd9f455ebbab903d34 From 3e6a01e83bb370e006f9feacbe344618ed5f32d7 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 14:38:32 -0500 Subject: [PATCH 104/124] plog v1.1.10 fixed --- extern/plog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extern/plog b/extern/plog index 85a871b13..e21baecd4 160000 --- a/extern/plog +++ b/extern/plog @@ -1 +1 @@ -Subproject commit 85a871b13be0bd1a9e0110744fa60cc9bd1e8380 +Subproject commit e21baecd4753f14da64ede979c5a19302618b752 From e99bf5d04087acea3807757a713189c5246e4a67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 13 Nov 2024 14:25:01 -0600 Subject: [PATCH 105/124] fix bug in Bfield setup --- setups/srpic/shock/pgen.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index 1eedb3a01..55dffb5d9 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -28,7 +28,7 @@ namespace user { @param drift_ux: drift velocity in the x direction */ InitFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) - : Bmag { bmag * static_cast(convert::deg2rad) } + : Bmag { bmag } , Btheta { btheta * static_cast(convert::deg2rad) } , Bphi { bphi * static_cast(convert::deg2rad) } , Vx { drift_ux } {} From 4374f94f6e0bc684f3f7ac63328cfdbb29461135 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 13 Nov 2024 14:56:50 -0600 Subject: [PATCH 106/124] switch BCs to be consistent with Tristan --- setups/srpic/shock/shock.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setups/srpic/shock/shock.toml b/setups/srpic/shock/shock.toml index c8a6f7c9d..4ed3a2b9e 100644 --- a/setups/srpic/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -11,7 +11,7 @@ metric = "minkowski" [grid.boundaries] - fields = [["FIXED", "ABSORB"], ["PERIODIC"]] + fields = [["ABSORB", "FIXED"], ["PERIODIC"]] particles = [["REFLECT", "ABSORB"], ["PERIODIC"]] [scales] From 2fbf906457e6e8e2bde54ff6ed7116d277581067 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 14:48:25 -0500 Subject: [PATCH 107/124] rebase --- input.example.toml | 20 +- setups/srpic/shock/pgen.hpp | 19 +- setups/wip/magpump/pgen.hpp | 170 +++++++++ src/engines/srpic.hpp | 186 +++++---- src/framework/parameters.cpp | 50 ++- src/global/arch/traits.h | 19 +- src/global/defaults.h | 6 +- src/global/enums.h | 8 +- src/global/global.h | 13 + src/global/tests/enums.cpp | 2 +- src/kernels/fields_bcs.hpp | 715 +++++++++++++++++++++++++---------- 11 files changed, 889 insertions(+), 319 deletions(-) create mode 100644 setups/wip/magpump/pgen.hpp diff --git a/input.example.toml b/input.example.toml index c5622d65a..3f367995a 100644 --- a/input.example.toml +++ b/input.example.toml @@ -90,11 +90,11 @@ # Boundary conditions for fields: # @required # @type: 1/2/3-size array of string tuples, each of size 1 or 2 - # @valid: "PERIODIC", "ABSORB", "FIXED", "ATMOSPHERE", "CUSTOM", "HORIZON" - # @example: [["CUSTOM", "ABSORB"]] (for 2D spherical [[rmin, rmax]]) + # @valid: "PERIODIC", "MATCH", "FIXED", "ATMOSPHERE", "CUSTOM", "HORIZON" + # @example: [["CUSTOM", "MATCH"]] (for 2D spherical [[rmin, rmax]]) # @note: When periodic in any of the directions, you should only set one value: [..., ["PERIODIC"], ...] - # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]): [["ATMOSPHERE", "ABSORB"]] - # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["ABSORB"]] + # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]): [["ATMOSPHERE", "match"]] + # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["match"]] fields = "" # Boundary conditions for fields: # @required @@ -106,8 +106,8 @@ # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["ABSORB"]] particles = "" - [grid.boundaries.absorb] - # Size of the absorption layer in physical (code) units: + [grid.boundaries.match] + # Size of the matching layer for fields in physical (code) units: # @type: float # @default: 1% of the domain size (in shortest dimension) # @note: In spherical, this is the size of the layer in r from the outer wall @@ -118,6 +118,14 @@ # @default: 1.0 coeff = "" + [grid.boundaries.absorb] + # Size of the absorption layer for particles in physical (code) units: + # @type: float + # @default: 1% of the domain size (in shortest dimension) + # @note: In spherical, this is the size of the layer in r from the outer wall + # @note: In cartesian, this is the same for all dimensions where applicable + ds = "" + [grid.boundaries.atmosphere] # @required: if ATMOSPHERE is one of the boundaries # Temperature of the atmosphere in units of m0 c^2 diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index 55dffb5d9..b8f169521 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -13,6 +13,8 @@ #include "archetypes/problem_generator.h" #include "framework/domain/metadomain.h" +#include + namespace user { using namespace ntt; @@ -93,20 +95,21 @@ namespace user { inline PGen() {} - auto FixField(const em& comp) const -> real_t { + auto FixFieldsConst(const bc_in&, const em& comp) const + -> std::pair { if (comp == em::ex2) { - return init_flds.ex2({ ZERO }); + return { init_flds.ex2({ ZERO }), true }; } else if (comp == em::ex3) { - return init_flds.ex3({ ZERO }); - } else if (comp == em::bx1) { - return init_flds.bx1({ ZERO }); + return { init_flds.ex3({ ZERO }), true }; } else { - raise::Error("Other components should not be requested when BC is in X", - HERE); - return ZERO; + return { ZERO, false }; } } + auto MatchFields(real_t time) const -> InitFields { + return init_flds; + } + inline void InitPrtls(Domain& local_domain) { const auto energy_dist = arch::Maxwellian(local_domain.mesh.metric, local_domain.random_pool, diff --git a/setups/wip/magpump/pgen.hpp b/setups/wip/magpump/pgen.hpp new file mode 100644 index 000000000..21d4c8882 --- /dev/null +++ b/setups/wip/magpump/pgen.hpp @@ -0,0 +1,170 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/traits.h" + +#include "archetypes/particle_injector.h" +#include "archetypes/problem_generator.h" +#include "framework/domain/metadomain.h" + +#include + +namespace user { + using namespace ntt; + + template + struct InitFields { + InitFields(real_t bsurf, real_t rstar) : Bsurf { bsurf }, Rstar { rstar } {} + + Inline auto bx1(const coord_t& x_Ph) const -> real_t { + return Bsurf * math::cos(x_Ph[1]) / CUBE(x_Ph[0] / Rstar); + } + + Inline auto bx2(const coord_t& x_Ph) const -> real_t { + return Bsurf * HALF * math::sin(x_Ph[1]) / CUBE(x_Ph[0] / Rstar); + } + + private: + const real_t Bsurf, Rstar; + }; + + template + struct DriveFields : public InitFields { + DriveFields(real_t time, real_t bsurf, real_t rstar) + : InitFields { bsurf, rstar } + , time { time } {} + + using InitFields::bx1; + using InitFields::bx2; + + Inline auto bx3(const coord_t&) const -> real_t { + return ZERO; + } + + Inline auto ex1(const coord_t&) const -> real_t { + return ZERO; + } + + Inline auto ex2(const coord_t& x_Ph) const -> real_t { + return ZERO; + } + + Inline auto ex3(const coord_t&) const -> real_t { + return ZERO; + } + + private: + const real_t time; + }; + + template + struct Inflow : public arch::EnergyDistribution { + Inflow(const M& metric, real_t vin) + : arch::EnergyDistribution { metric } + , vin { vin } {} + + Inline void operator()(const coord_t&, + vec_t& v_Ph, + unsigned short) const override { + v_Ph[0] = -vin; + } + + private: + const real_t vin; + }; + + template + struct Sphere : public arch::SpatialDistribution { + Sphere(const M& metric, real_t r0, real_t dr) + : arch::SpatialDistribution { metric } + , r0 { r0 } + , dr { dr } {} + + Inline auto operator()(const coord_t& x_Ph) const -> real_t override { + return math::exp(-SQR((x_Ph[0] - r0) / dr)) * + (x_Ph[1] > 0.25 && x_Ph[1] < constant::PI - 0.25); + } + + private: + const real_t r0, dr; + }; + + template + struct PGen : public arch::ProblemGenerator { + static constexpr auto engines { traits::compatible_with::value }; + static constexpr auto metrics { + traits::compatible_with::value + }; + static constexpr auto dimensions { traits::compatible_with::value }; + + using arch::ProblemGenerator::D; + using arch::ProblemGenerator::C; + using arch::ProblemGenerator::params; + + const real_t Bsurf, pump_period, pump_ampl, pump_radius, Rstar; + const real_t vin, drinj; + InitFields init_flds; + + inline PGen(const SimulationParams& p, const Metadomain& m) + : arch::ProblemGenerator { p } + , Bsurf { p.template get("setup.Bsurf", ONE) } + , pump_period { p.template get("setup.pump_period") } + , pump_ampl { p.template get("setup.pump_ampl") } + , pump_radius { p.template get("setup.pump_radius") } + , Rstar { m.mesh().extent(in::x1).first } + , vin { p.template get("setup.vin") } + , drinj { p.template get("setup.drinj") } + , init_flds { Bsurf, Rstar } {} + + auto FieldDriver(real_t time) const -> DriveFields { + return DriveFields { time, Bsurf, Rstar }; + } + + void CustomPostStep(std::size_t, long double time, Domain& domain) { + const real_t radius = pump_radius + + pump_ampl * + math::sin(time * constant::TWO_PI / pump_period); + const real_t dr = 1.0; + const auto& metric = domain.mesh.metric; + auto EM = domain.fields.em; + Kokkos::parallel_for( + "outerBC", + domain.mesh.rangeActiveCells(), + Lambda(index_t i1, index_t i2) { + const auto i1_ = COORD(i1), i2_ = COORD(i2); + const auto r = metric.template convert<1, Crd::Cd, Crd::Ph>(i1_); + if (r > radius - 5 * dr) { + const auto smooth = HALF * (ONE - math::tanh((r - radius) / dr)); + EM(i1, i2, em::ex1) = smooth * EM(i1, i2, em::ex1); + EM(i1, i2, em::ex2) = smooth * EM(i1, i2, em::ex2); + EM(i1, i2, em::ex3) = smooth * EM(i1, i2, em::ex3); + EM(i1, i2, em::bx1) = smooth * EM(i1, i2, em::bx1); + EM(i1, i2, em::bx2) = smooth * EM(i1, i2, em::bx2); + EM(i1, i2, em::bx3) = smooth * EM(i1, i2, em::bx3); + } + }); + + if (time < pump_period * 0.25) { + const auto energy_dist = Inflow(domain.mesh.metric, vin); + const auto spatial_dist = Sphere(domain.mesh.metric, radius, drinj); + const auto injector = arch::NonUniformInjector( + energy_dist, + spatial_dist, + { 1, 2 }); + + arch::InjectNonUniform>( + params, + domain, + injector, + ONE, + true); + } + } + }; + +} // namespace user + +#endif diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index d8cb1e64c..c44f7641c 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -584,8 +584,8 @@ namespace ntt { void FieldBoundaries(domain_t& domain, BCTags tags) { for (auto& direction : dir::Directions::orth) { - if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::ABSORB) { - AbsorbFieldsIn(direction, domain, tags); + if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::MATCH) { + MatchFieldsIn(direction, domain, tags); } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::AXIS) { if (domain.mesh.flds_bc_in(direction) == FldsBC::AXIS) { AxisFieldsIn(direction, domain, tags); @@ -606,14 +606,13 @@ namespace ntt { } // loop over directions } - void AbsorbFieldsIn(dir::direction_t direction, - domain_t& domain, - BCTags tags) { + void MatchFieldsIn(dir::direction_t direction, + domain_t& domain, + BCTags tags) { /** - * absorbing boundaries + * matching boundaries */ - const auto ds = m_params.template get( - "grid.boundaries.absorb.ds"); + const auto ds = m_params.template get("grid.boundaries.match.ds"); const auto dim = direction.get_dim(); real_t xg_min, xg_max, xg_edge; auto sign = direction.get_sign(); @@ -652,40 +651,49 @@ namespace ntt { range_min[d] = intersect_range[d].first; range_max[d] = intersect_range[d].second; } - if (dim == in::x1) { - Kokkos::parallel_for( - "AbsorbFields", - CreateRangePolicy(range_min, range_max), - kernel::AbsorbBoundaries_kernel(domain.fields.em, - domain.mesh.metric, - xg_edge, - ds, - tags)); - } else if (dim == in::x2) { - if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { - Kokkos::parallel_for( - "AbsorbFields", - CreateRangePolicy(range_min, range_max), - kernel::AbsorbBoundaries_kernel(domain.fields.em, - domain.mesh.metric, - xg_edge, - ds, - tags)); - } else { - raise::Error("Invalid dimension", HERE); - } - } else if (dim == in::x3) { - if constexpr (M::Dim == Dim::_3D) { + if constexpr (traits::has_member::value) { + auto match_fields = m_pgen.MatchFields(time); + if (dim == in::x1) { Kokkos::parallel_for( - "AbsorbFields", + "MatchFields", CreateRangePolicy(range_min, range_max), - kernel::AbsorbBoundaries_kernel(domain.fields.em, - domain.mesh.metric, - xg_edge, - ds, - tags)); - } else { - raise::Error("Invalid dimension", HERE); + kernel::MatchBoundaries_kernel( + domain.fields.em, + match_fields, + domain.mesh.metric, + xg_edge, + ds, + tags)); + } else if (dim == in::x2) { + if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { + Kokkos::parallel_for( + "MatchFields", + CreateRangePolicy(range_min, range_max), + kernel::MatchBoundaries_kernel( + domain.fields.em, + match_fields, + domain.mesh.metric, + xg_edge, + ds, + tags)); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dim == in::x3) { + if constexpr (M::Dim == Dim::_3D) { + Kokkos::parallel_for( + "MatchFields", + CreateRangePolicy(range_min, range_max), + kernel::MatchBoundaries_kernel( + domain.fields.em, + match_fields, + domain.mesh.metric, + xg_edge, + ds, + tags)); + } else { + raise::Error("Invalid dimension", HERE); + } } } } @@ -774,40 +782,51 @@ namespace ntt { if (tags & BC::B) { comps.push_back(normal_b_comp); } - if constexpr (traits::has_member::value) { - raise::Error("Field driver for fixed fields not implemented", HERE); - } else { - // if field driver not present, set fields to fixed values + if constexpr (traits::has_member::value) { + raise::Error("Non-const fixed fields not implemented", HERE); + } else if constexpr ( + traits::has_member::value) { for (const auto& comp : comps) { - auto value = ZERO; + auto value = ZERO; + bool shouldset = false; if constexpr ( - traits::has_member::value) { + traits::has_member::value) { // if fix field function present, read from it - value = m_pgen.FixField((em)comp); + const auto newset = m_pgen.FixFieldsConst( + (bc_in)(sign * ((short)dim + 1)), + (em)comp); + value = newset.first; + shouldset = newset.second; } - if constexpr (M::Dim == Dim::_1D) { - Kokkos::deep_copy(Kokkos::subview(domain.fields.em, - std::make_pair(xi_min[0], xi_max[0]), - comp), - value); - } else if constexpr (M::Dim == Dim::_2D) { - Kokkos::deep_copy(Kokkos::subview(domain.fields.em, - std::make_pair(xi_min[0], xi_max[0]), - std::make_pair(xi_min[1], xi_max[1]), - comp), - value); - } else if constexpr (M::Dim == Dim::_3D) { - Kokkos::deep_copy( - Kokkos::subview(domain.fields.em, - std::make_pair(xi_min[0], xi_max[0]), - std::make_pair(xi_min[1], xi_max[1]), - std::make_pair(xi_min[2], xi_max[2]), - comp), - value); - } else { - raise::Error("Invalid dimension", HERE); + if (shouldset) { + if constexpr (M::Dim == Dim::_1D) { + Kokkos::deep_copy( + Kokkos::subview(domain.fields.em, + std::make_pair(xi_min[0], xi_max[0]), + comp), + value); + } else if constexpr (M::Dim == Dim::_2D) { + Kokkos::deep_copy( + Kokkos::subview(domain.fields.em, + std::make_pair(xi_min[0], xi_max[0]), + std::make_pair(xi_min[1], xi_max[1]), + comp), + value); + } else if constexpr (M::Dim == Dim::_3D) { + Kokkos::deep_copy( + Kokkos::subview(domain.fields.em, + std::make_pair(xi_min[0], xi_max[0]), + std::make_pair(xi_min[1], xi_max[1]), + std::make_pair(xi_min[2], xi_max[2]), + comp), + value); + } else { + raise::Error("Invalid dimension", HERE); + } } } + } else { + raise::Error("Fixed fields not present (both const and non-const)", HERE); } } @@ -817,7 +836,7 @@ namespace ntt { /** * atmosphere field boundaries */ - if constexpr (traits::has_member::value) { + if constexpr (traits::has_member::value) { const auto [sign, dim, xg_min, xg_max] = get_atm_extent(direction); const auto dd = static_cast(dim); boundaries_t box; @@ -846,7 +865,7 @@ namespace ntt { range_min[d] = intersect_range[d].first; range_max[d] = intersect_range[d].second; } - auto field_driver = m_pgen.FieldDriver(time); + auto atm_fields = m_pgen.AtmFields(time); std::size_t il_edge; if (sign > 0) { il_edge = range_min[dd] - N_GHOSTS; @@ -859,9 +878,9 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::EnforcedBoundaries_kernel( + kernel::EnforcedBoundaries_kernel( domain.fields.em, - field_driver, + atm_fields, domain.mesh.metric, il_edge, tags)); @@ -869,9 +888,9 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::EnforcedBoundaries_kernel( + kernel::EnforcedBoundaries_kernel( domain.fields.em, - field_driver, + atm_fields, domain.mesh.metric, il_edge, tags)); @@ -882,9 +901,9 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::EnforcedBoundaries_kernel( + kernel::EnforcedBoundaries_kernel( domain.fields.em, - field_driver, + atm_fields, domain.mesh.metric, il_edge, tags)); @@ -892,9 +911,9 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::EnforcedBoundaries_kernel( + kernel::EnforcedBoundaries_kernel( domain.fields.em, - field_driver, + atm_fields, domain.mesh.metric, il_edge, tags)); @@ -908,9 +927,9 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::EnforcedBoundaries_kernel( + kernel::EnforcedBoundaries_kernel( domain.fields.em, - field_driver, + atm_fields, domain.mesh.metric, il_edge, tags)); @@ -918,9 +937,9 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::EnforcedBoundaries_kernel( + kernel::EnforcedBoundaries_kernel( domain.fields.em, - field_driver, + atm_fields, domain.mesh.metric, il_edge, tags)); @@ -932,8 +951,7 @@ namespace ntt { raise::Error("Invalid dimension", HERE); } } else { - raise::Error("Field driver not implemented in PGEN for atmosphere BCs", - HERE); + raise::Error("Atm fields not implemented in PGEN for atmosphere BCs", HERE); } } diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 0a605651c..4a9b3056a 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -47,7 +47,7 @@ namespace ntt { /* * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . - * Parameters that must not be changed during after the checkpoint restart + * Parameters that must not be changed during the checkpoint restart * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ void SimulationParams::setImmutableParams(const toml::value& toml_data) { @@ -322,7 +322,7 @@ namespace ntt { /* * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . - * Parameters that may be changed during after the checkpoint restart + * Parameters that may be changed during the checkpoint restart * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ void SimulationParams::setMutableParams(const toml::value& toml_data) { @@ -351,9 +351,9 @@ namespace ntt { auto atm_defined = false; for (const auto& bcs : flds_bc) { for (const auto& bc : bcs) { - if (fmt::toLower(bc) == "absorb") { - promiseToDefine("grid.boundaries.absorb.ds"); - promiseToDefine("grid.boundaries.absorb.coeff"); + if (fmt::toLower(bc) == "match") { + promiseToDefine("grid.boundaries.match.ds"); + promiseToDefine("grid.boundaries.match.coeff"); } if (fmt::toLower(bc) == "atmosphere") { raise::ErrorIf(atm_defined, @@ -386,7 +386,6 @@ namespace ntt { for (const auto& bc : bcs) { if (fmt::toLower(bc) == "absorb") { promiseToDefine("grid.boundaries.absorb.ds"); - promiseToDefine("grid.boundaries.absorb.coeff"); } if (fmt::toLower(bc) == "atmosphere") { raise::ErrorIf(atm_defined, @@ -731,6 +730,38 @@ namespace ntt { set("grid.boundaries.fields", flds_bc_pairwise); set("grid.boundaries.particles", prtl_bc_pairwise); + if (isPromised("grid.boundaries.match.ds")) { + if (coord_enum == Coord::Cart) { + auto min_extent = std::numeric_limits::max(); + for (const auto& e : extent_pairwise) { + min_extent = std::min(min_extent, e.second - e.first); + } + set("grid.boundaries.match.ds", + toml::find_or(toml_data, + "grid", + "boundaries", + "match", + "ds", + min_extent * defaults::bc::match::ds_frac)); + } else { + auto r_extent = extent_pairwise[0].second - extent_pairwise[0].first; + set("grid.boundaries.match.ds", + toml::find_or(toml_data, + "grid", + "boundaries", + "match", + "ds", + r_extent * defaults::bc::match::ds_frac)); + } + set("grid.boundaries.match.coeff", + toml::find_or(toml_data, + "grid", + "boundaries", + "match", + "coeff", + defaults::bc::match::coeff)); + } + if (isPromised("grid.boundaries.absorb.ds")) { if (coord_enum == Coord::Cart) { auto min_extent = std::numeric_limits::max(); @@ -754,13 +785,6 @@ namespace ntt { "ds", r_extent * defaults::bc::absorb::ds_frac)); } - set("grid.boundaries.absorb.coeff", - toml::find_or(toml_data, - "grid", - "boundaries", - "absorb", - "coeff", - defaults::bc::absorb::coeff)); } if (isPromised("grid.boundaries.atmosphere.temperature")) { diff --git a/src/global/arch/traits.h b/src/global/arch/traits.h index 9fd40e201..4cde4fca5 100644 --- a/src/global/arch/traits.h +++ b/src/global/arch/traits.h @@ -10,7 +10,11 @@ * - traits::run_t, traits::to_string_t * - traits::pgen::init_flds_t * - traits::pgen::ext_force_t - * - traits::pgen::field_driver_t + * - traits::pgen::atm_fields_t + * - traits::pgen::match_fields_const_t + * - traits::pgen::match_fields_t + * - traits::pgen::fix_fields_const_t + * - traits::pgen::fix_fields_t * - traits::pgen::init_prtls_t * - traits::pgen::custom_fields_t * - traits::pgen::custom_field_output_t @@ -94,10 +98,19 @@ namespace traits { using ext_force_t = decltype(&T::ext_force); template - using field_driver_t = decltype(&T::FieldDriver); + using atm_fields_t = decltype(&T::AtmFields); template - using fix_field_t = decltype(&T::FixField); + using match_fields_t = decltype(&T::MatchFields); + + template + using match_fields_const_t = decltype(&T::MatchFieldsConst); + + template + using fix_fields_t = decltype(&T::FixFields); + + template + using fix_fields_const_t = decltype(&T::FixFieldsConst); template using custom_fields_t = decltype(&T::CustomFields); diff --git a/src/global/defaults.h b/src/global/defaults.h index b7b0107e7..f44fd1844 100644 --- a/src/global/defaults.h +++ b/src/global/defaults.h @@ -41,9 +41,13 @@ namespace ntt::defaults { } // namespace gr namespace bc { - namespace absorb { + namespace match { const real_t ds_frac = 0.01; const real_t coeff = 1.0; + } // namespace match + + namespace absorb { + const real_t ds_frac = 0.01; } // namespace absorb } // namespace bc diff --git a/src/global/enums.h b/src/global/enums.h index 283cb456d..8f2495c13 100644 --- a/src/global/enums.h +++ b/src/global/enums.h @@ -8,7 +8,7 @@ * - enum ntt::SimEngine // SRPIC, GRPIC * - enum ntt::PrtlBC // periodic, absorb, atmosphere, custom, * reflect, horizon, axis, sync - * - enum ntt::FldsBC // periodic, absorb, fixed, atmosphere, + * - enum ntt::FldsBC // periodic, match, fixed, atmosphere, * custom, horizon, axis, sync * - enum ntt::PrtlPusher // boris, vay, photon, none * - enum ntt::Cooling // synchrotron, none @@ -215,7 +215,7 @@ namespace ntt { enum type : uint8_t { INVALID = 0, PERIODIC = 1, - ABSORB = 2, + MATCH = 2, FIXED = 3, ATMOSPHERE = 4, CUSTOM = 5, @@ -226,9 +226,9 @@ namespace ntt { constexpr FldsBC(uint8_t c) : enums_hidden::BaseEnum { c } {} - static constexpr type variants[] = { PERIODIC, ABSORB, FIXED, ATMOSPHERE, + static constexpr type variants[] = { PERIODIC, MATCH, FIXED, ATMOSPHERE, CUSTOM, HORIZON, AXIS, SYNC }; - static constexpr const char* lookup[] = { "periodic", "absorb", "fixed", + static constexpr const char* lookup[] = { "periodic", "match", "fixed", "atmosphere", "custom", "horizon", "axis", "sync" }; static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); diff --git a/src/global/global.h b/src/global/global.h index 6669981b6..77fa8c51c 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -13,6 +13,10 @@ * - enum PrepareOutput * - enum CellLayer // allLayer, activeLayer, minGhostLayer, * minActiveLayer, maxActiveLayer, maxGhostLayer + * - enum Idx // U, D, T, XYZ, Sph, PU, PD + * - enum Crd // Cd, Ph, XYZ, Sph + * - enum in // x1, x2, x3 + * - enum bc_in // Px1, Mx1, Px2, Mx2, Px3, Mx3 * - type box_region_t * - files::LogFile, files::ErrFile, files::InfoFile * - type prtldx_t @@ -184,6 +188,15 @@ enum class in : unsigned short { x3 = 2, }; +enum class bc_in : short { + Mx1 = -1, + Px1 = 1, + Mx2 = -2, + Px2 = 2, + Mx3 = -3, + Px3 = 3, +}; + template using box_region_t = CellLayer[D]; diff --git a/src/global/tests/enums.cpp b/src/global/tests/enums.cpp index 4d678e85e..7785ec1a3 100644 --- a/src/global/tests/enums.cpp +++ b/src/global/tests/enums.cpp @@ -61,7 +61,7 @@ auto main() -> int { enum_str_t all_simulation_engines = { "srpic", "grpic" }; enum_str_t all_particle_bcs = { "periodic", "absorb", "atmosphere", "custom", "reflect", "horizon", "axis", "sync" }; - enum_str_t all_fields_bcs = { "periodic", "absorb", "fixed", "atmosphere", + enum_str_t all_fields_bcs = { "periodic", "match", "fixed", "atmosphere", "custom", "horizon", "axis", "sync" }; enum_str_t all_particle_pushers = { "boris", "vay", "photon", "none" }; enum_str_t all_coolings = { "synchrotron", "none" }; diff --git a/src/kernels/fields_bcs.hpp b/src/kernels/fields_bcs.hpp index 2f2a458bb..4d52dd207 100644 --- a/src/kernels/fields_bcs.hpp +++ b/src/kernels/fields_bcs.hpp @@ -15,61 +15,133 @@ namespace kernel { using namespace ntt; - template - struct AbsorbBoundaries_kernel { + template + struct MatchBoundaries_kernel { static_assert(M::is_metric, "M must be a metric class"); - static_assert(i <= static_cast(M::Dim), + static_assert(static_cast(o) < + static_cast(M::Dim), "Invalid component index"); + static constexpr idx_t i = static_cast(o) + 1u; + static constexpr bool defines_dx1 = traits::has_method::value; + static constexpr bool defines_dx2 = traits::has_method::value; + static constexpr bool defines_dx3 = traits::has_method::value; + static constexpr bool defines_ex1 = traits::has_method::value; + static constexpr bool defines_ex2 = traits::has_method::value; + static constexpr bool defines_ex3 = traits::has_method::value; + static constexpr bool defines_bx1 = traits::has_method::value; + static constexpr bool defines_bx2 = traits::has_method::value; + static constexpr bool defines_bx3 = traits::has_method::value; ndfield_t Fld; + const I fset; const M metric; const real_t xg_edge; const real_t dx_abs; const BCTags tags; - AbsorbBoundaries_kernel(ndfield_t Fld, - const M& metric, - real_t xg_edge, - real_t dx_abs, - BCTags tags) + MatchBoundaries_kernel(ndfield_t Fld, + const I& fset, + const M& metric, + real_t xg_edge, + real_t dx_abs, + BCTags tags) : Fld { Fld } + , fset { fset } , metric { metric } , xg_edge { xg_edge } , dx_abs { dx_abs } , tags { tags } {} + Inline auto shape(const real_t& dx) const -> real_t { + return math::tanh(dx * FOUR / dx_abs); + } + Inline void operator()(index_t i1) const { if constexpr (M::Dim == Dim::_1D) { const auto i1_ = COORD(i1); - for (const auto comp : - { em::ex1, em::ex2, em::ex3, em::bx1, em::bx2, em::bx3 }) { - if ((comp == em::ex1) and not(tags & BC::Ex1)) { - continue; - } else if ((comp == em::ex2) and not(tags & BC::Ex2)) { - continue; - } else if ((comp == em::ex3) and not(tags & BC::Ex3)) { - continue; - } else if ((comp == em::bx1) and not(tags & BC::Bx1)) { - continue; - } else if ((comp == em::bx2) and not(tags & BC::Bx2)) { - continue; - } else if ((comp == em::bx3) and not(tags & BC::Bx3)) { - continue; - } - coord_t x_Cd { ZERO }; - if (comp == em::ex1 or comp == em::bx2 or comp == em::bx3) { - x_Cd[0] = i1_ + HALF; - } else if (comp == em::ex2 or comp == em::bx1 or comp == em::ex3) { - x_Cd[0] = i1_; - } - const auto dx = math::abs( - metric.template convert(x_Cd[i - 1]) - xg_edge); - Fld(i1, comp) *= math::tanh(dx / (INV_4 * dx_abs)); + + coord_t x_Ph_0 { ZERO }; + coord_t x_Ph_H { ZERO }; + metric.template convert({ i1_ }, x_Ph_0); + metric.template convert({ i1_ + HALF }, x_Ph_H); + + if constexpr (S == SimEngine::SRPIC) { + // SRPIC + auto ex1_U { ZERO }, ex2_U { ZERO }, ex3_U { ZERO }, bx1_U { ZERO }, + bx2_U { ZERO }, bx3_U { ZERO }; + if (tags & BC::E) { + if constexpr (defines_ex1) { + ex1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_ + HALF }, + fset.ex1(x_Ph_H)); + } + if constexpr (defines_ex2) { + ex2_U = metric.template transform<2, Idx::T, Idx::U>( + { i1_ }, + fset.ex2(x_Ph_0)); + } + if constexpr (defines_ex3) { + ex3_U = metric.template transform<3, Idx::T, Idx::U>( + { i1_ }, + fset.ex3(x_Ph_0)); + } + } + if (tags & BC::B) { + if constexpr (defines_bx1) { + bx1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_ }, + fset.bx1(x_Ph_0)); + } + if constexpr (defines_bx2) { + bx2_U = metric.template transform<2, Idx::T, Idx::U>( + { i1_ + HALF }, + fset.bx2(x_Ph_H)); + } + if constexpr (defines_bx3) { + bx3_U = metric.template transform<3, Idx::T, Idx::U>( + { i1_ + HALF }, + fset.bx3(x_Ph_H)); + } + } + + { + const auto dx = math::abs( + metric.template convert(i1_ + HALF) - xg_edge); + const auto s = shape(dx); + if (tags & BC::E) { + // ex1 + Fld(i1, em::ex1) = s * Fld(i1, em::ex1) + (ONE - s) * ex1_U; + } + if (tags & BC::B) { + // bx2 + Fld(i1, em::bx2) = s * Fld(i1, em::bx2) + (ONE - s) * bx2_U; + // bx3 + Fld(i1, em::bx3) = s * Fld(i1, em::bx3) + (ONE - s) * bx3_U; + } + } + { + const auto dx = math::abs( + metric.template convert(i1_) - xg_edge); + const auto s = shape(dx); + if (tags & BC::B) { + // bx1 + Fld(i1, em::bx1) = s * Fld(i1, em::bx1) + (ONE - s) * bx1_U; + } + if (tags & BC::E) { + // ex2 + Fld(i1, em::ex2) = s * Fld(i1, em::ex2) + (ONE - s) * ex2_U; + // ex3 + Fld(i1, em::ex3) = s * Fld(i1, em::ex3) + (ONE - s) * ex3_U; + } + } + } else { + // GRPIC + raise::KernelError(HERE, "1D GRPIC not implemented"); } } else { raise::KernelError( HERE, - "AbsorbFields_kernel: 1D implementation called for D != 1"); + "MatchBoundaries_kernel: 1D implementation called for D != 1"); } } @@ -77,43 +149,130 @@ namespace kernel { if constexpr (M::Dim == Dim::_2D) { const auto i1_ = COORD(i1); const auto i2_ = COORD(i2); - for (const auto comp : - { em::ex1, em::ex2, em::ex3, em::bx1, em::bx2, em::bx3 }) { - if ((comp == em::ex1) and not(tags & BC::Ex1)) { - continue; - } else if ((comp == em::ex2) and not(tags & BC::Ex2)) { - continue; - } else if ((comp == em::ex3) and not(tags & BC::Ex3)) { - continue; - } else if ((comp == em::bx1) and not(tags & BC::Bx1)) { - continue; - } else if ((comp == em::bx2) and not(tags & BC::Bx2)) { - continue; - } else if ((comp == em::bx3) and not(tags & BC::Bx3)) { - continue; - } - coord_t x_Cd { ZERO }; - if (comp == em::ex1 or comp == em::bx2) { - x_Cd[0] = i1_ + HALF; - x_Cd[1] = i2_; - } else if (comp == em::ex2 or comp == em::bx1) { - x_Cd[0] = i1_; - x_Cd[1] = i2_ + HALF; - } else if (comp == em::ex3) { - x_Cd[0] = i1_; - x_Cd[1] = i2_; - } else if (comp == em::bx3) { - x_Cd[0] = i1_ + HALF; - x_Cd[1] = i2_ + HALF; - } - const auto dx = math::abs( - metric.template convert(x_Cd[i - 1]) - xg_edge); - Fld(i1, i2, comp) *= math::tanh(dx / (INV_4 * dx_abs)); + + if constexpr (S == SimEngine::SRPIC) { + // SRPIC + { + coord_t x_Ph_H0 { ZERO }; + metric.template convert({ i1_ + HALF, i2_ }, x_Ph_H0); + // i1 + 1/2, i2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else { + xi_Cd = i2_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + if (tags & BC::E) { + auto ex1_U { ZERO }; + if constexpr (defines_ex1) { + ex1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_ + HALF, i2_ }, + fset.ex1(x_Ph_H0)); + } + // ex1 + Fld(i1, i2, em::ex1) = s * Fld(i1, i2, em::ex1) + (ONE - s) * ex1_U; + } + if (tags & BC::B) { + auto bx2_U { ZERO }; + if constexpr (defines_bx2) { + bx2_U = metric.template transform<2, Idx::T, Idx::U>( + { i1_ + HALF, i2_ }, + fset.bx2(x_Ph_H0)); + } + // bx2 + Fld(i1, i2, em::bx2) = s * Fld(i1, i2, em::bx2) + (ONE - s) * bx2_U; + } + } + { + coord_t x_Ph_0H { ZERO }; + metric.template convert({ i1_, i2_ + HALF }, x_Ph_0H); + // i1, i2 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else { + xi_Cd = i2_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + if (tags & BC::E) { + auto ex2_U { ZERO }; + if constexpr (defines_ex2) { + ex2_U = metric.template transform<2, Idx::T, Idx::U>( + { i1_, i2_ + HALF }, + fset.ex2(x_Ph_0H)); + } + // ex2 + Fld(i1, i2, em::ex2) = s * Fld(i1, i2, em::ex2) + (ONE - s) * ex2_U; + } + if (tags & BC::B) { + auto bx1_U { ZERO }; + if constexpr (defines_bx1) { + bx1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_, i2_ + HALF }, + fset.bx1(x_Ph_0H)); + } + // bx1 + Fld(i1, i2, em::bx1) = s * Fld(i1, i2, em::bx1) + (ONE - s) * bx1_U; + } + } + if (tags & BC::E) { + auto ex3_U { ZERO }; + if constexpr (defines_ex3) { + coord_t x_Ph_00 { ZERO }; + metric.template convert({ i1_, i2_ }, x_Ph_00); + ex3_U = metric.template transform<3, Idx::T, Idx::U>( + { i1_, i2_ }, + fset.ex3(x_Ph_00)); + } + // i1, i2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else { + xi_Cd = i2_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + // ex3 + Fld(i1, i2, em::ex3) = s * Fld(i1, i2, em::ex3) + (ONE - s) * ex3_U; + } + if (tags & BC::B) { + auto bx3_U { ZERO }; + if constexpr (defines_bx3) { + coord_t x_Ph_HH { ZERO }; + metric.template convert({ i1_ + HALF, i2_ + HALF }, + x_Ph_HH); + bx3_U = metric.template transform<3, Idx::T, Idx::U>( + { i1_ + HALF, i2_ + HALF }, + fset.bx3(x_Ph_HH)); + } + // i1 + 1/2, i2 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else { + xi_Cd = i2_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + // bx3 + Fld(i1, i2, em::bx3) = s * Fld(i1, i2, em::bx3) + (ONE - s) * bx3_U; + } + } else { + // GRPIC + raise::KernelError(HERE, "GRPIC not implemented"); } } else { raise::KernelError( HERE, - "AbsorbFields_kernel: 2D implementation called for D != 2"); + "MatchBoundaries_kernel: 2D implementation called for D != 2"); } } @@ -122,55 +281,180 @@ namespace kernel { const auto i1_ = COORD(i1); const auto i2_ = COORD(i2); const auto i3_ = COORD(i3); - for (const auto comp : - { em::ex1, em::ex2, em::ex3, em::bx1, em::bx2, em::bx3 }) { - if ((comp == em::ex1) and not(tags & BC::Ex1)) { - continue; - } else if ((comp == em::ex2) and not(tags & BC::Ex2)) { - continue; - } else if ((comp == em::ex3) and not(tags & BC::Ex3)) { - continue; - } else if ((comp == em::bx1) and not(tags & BC::Bx1)) { - continue; - } else if ((comp == em::bx2) and not(tags & BC::Bx2)) { - continue; - } else if ((comp == em::bx3) and not(tags & BC::Bx3)) { - continue; - } - coord_t x_Cd { ZERO }; - if (comp == em::ex1) { - x_Cd[0] = i1_ + HALF; - x_Cd[1] = i2_; - x_Cd[2] = i3_; - } else if (comp == em::ex2) { - x_Cd[0] = i1_; - x_Cd[1] = i2_ + HALF; - x_Cd[2] = i3_; - } else if (comp == em::ex3) { - x_Cd[0] = i1_; - x_Cd[1] = i2_; - x_Cd[2] = i3_ + HALF; - } else if (comp == em::bx1) { - x_Cd[0] = i1_; - x_Cd[1] = i2_ + HALF; - x_Cd[2] = i3_ + HALF; - } else if (comp == em::bx2) { - x_Cd[0] = i1_ + HALF; - x_Cd[1] = i2_; - x_Cd[2] = i3_ + HALF; - } else if (comp == em::bx3) { - x_Cd[0] = i1_ + HALF; - x_Cd[1] = i2_ + HALF; - x_Cd[2] = i3_; - } - const auto dx = math::abs( - metric.template convert(x_Cd[i - 1]) - xg_edge); - Fld(i1, i2, i3, comp) *= math::tanh(dx / (INV_4 * dx_abs)); + + if constexpr (S == SimEngine::SRPIC) { + // SRPIC + if (tags & BC::E) { + { + // i1 + 1/2, i2, i3 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else if constexpr (o == in::x2) { + xi_Cd = i2_; + } else { + xi_Cd = i3_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto ex1_U { ZERO }; + if constexpr (defines_ex1) { + coord_t x_Ph_H00 { ZERO }; + metric.template convert({ i1_ + HALF, i2_, i3_ }, + x_Ph_H00); + ex1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_ + HALF, i2_, i3_ }, + fset.ex1(x_Ph_H00)); + } + // ex1 + Fld(i1, i2, i3, em::ex1) = s * Fld(i1, i2, i3, em::ex1) + + (ONE - s) * ex1_U; + } + { + // i1, i2 + 1/2, i3 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else if constexpr (o == in::x2) { + xi_Cd = i2_ + HALF; + } else { + xi_Cd = i3_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto ex2_U { ZERO }; + if constexpr (defines_ex2) { + coord_t x_Ph_0H0 { ZERO }; + metric.template convert({ i1_, i2_ + HALF, i3_ }, + x_Ph_0H0); + ex2_U = metric.template transform<2, Idx::T, Idx::U>( + { i1_, i2_ + HALF, i3_ }, + fset.ex2(x_Ph_0H0)); + } + // ex2 + Fld(i1, i2, i3, em::ex2) = s * Fld(i1, i2, i3, em::ex2) + + (ONE - s) * ex2_U; + } + { + // i1, i2, i3 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else if constexpr (o == in::x2) { + xi_Cd = i2_; + } else { + xi_Cd = i3_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto ex3_U { ZERO }; + if constexpr (defines_ex3) { + coord_t x_Ph_00H { ZERO }; + metric.template convert({ i1_, i2_, i3_ + HALF }, + x_Ph_00H); + ex3_U = metric.template transform<3, Idx::T, Idx::U>( + { i1_, i2_, i3_ + HALF }, + fset.ex3(x_Ph_00H)); + } + // ex3 + Fld(i1, i2, i3, em::ex3) = s * Fld(i1, i2, i3, em::ex3) + + (ONE - s) * ex3_U; + } + } + if (tags & BC::B) { + { + // i1, i2 + 1/2, i3 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else if constexpr (o == in::x2) { + xi_Cd = i2_ + HALF; + } else { + xi_Cd = i3_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto bx1_U { ZERO }; + if constexpr (defines_bx1) { + coord_t x_Ph_0HH { ZERO }; + metric.template convert( + { i1_, i2_ + HALF, i3_ + HALF }, + x_Ph_0HH); + bx1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_, i2_ + HALF, i3_ + HALF }, + fset.bx1(x_Ph_0HH)); + } + // bx1 + Fld(i1, i2, i3, em::bx1) = s * Fld(i1, i2, i3, em::bx1) + + (ONE - s) * bx1_U; + } + { + // i1 + 1/2, i2, i3 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else if constexpr (o == in::x2) { + xi_Cd = i2_; + } else { + xi_Cd = i3_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto bx2_U { ZERO }; + if constexpr (defines_bx2) { + coord_t x_Ph_H0H { ZERO }; + metric.template convert( + { i1_ + HALF, i2_, i3_ + HALF }, + x_Ph_H0H); + bx2_U = metric.template transform<2, Idx::T, Idx::U>( + { i1_ + HALF, i2_, i3_ + HALF }, + fset.bx2(x_Ph_H0H)); + } + // bx2 + Fld(i1, i2, i3, em::bx2) = s * Fld(i1, i2, i3, em::bx2) + + (ONE - s) * bx2_U; + } + { + // i1 + 1/2, i2 + 1/2, i3 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else if constexpr (o == in::x2) { + xi_Cd = i2_ + HALF; + } else { + xi_Cd = i3_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto bx3_U { ZERO }; + if constexpr (defines_bx3) { + coord_t x_Ph_HH0 { ZERO }; + metric.template convert( + { i1_ + HALF, i2_ + HALF, i3_ }, + x_Ph_HH0); + bx3_U = metric.template transform<3, Idx::T, Idx::U>( + { i1_ + HALF, i2_ + HALF, i3_ }, + fset.bx3(x_Ph_HH0)); + } + // bx3 + Fld(i1, i2, i3, em::bx3) = s * Fld(i1, i2, i3, em::bx3) + + (ONE - s) * bx3_U; + } + } + } else { + // GRPIC + raise::KernelError(HERE, "GRPIC not implemented"); } } else { raise::KernelError( HERE, - "AbsorbFields_kernel: 3D implementation called for D != 3"); + "MatchBoundaries_kernel: 3D implementation called for D != 3"); } } }; @@ -226,31 +510,28 @@ namespace kernel { static constexpr bool defines_bx2 = traits::has_method::value; static constexpr bool defines_bx3 = traits::has_method::value; - static_assert(defines_ex1 and defines_ex2 and defines_ex3 and - defines_bx1 and defines_bx2 and defines_bx3, - "not all components of E or B are specified in PGEN"); + static_assert(defines_ex1 or defines_ex2 or defines_ex3 or defines_bx1 or + defines_bx2 or defines_bx3, + "none of the components of E or B are specified in PGEN"); static_assert(M::is_metric, "M must be a metric class"); static_assert(static_cast(O) < static_cast(M::Dim), "Invalid Orientation"); ndfield_t Fld; - const I finit; + const I fset; const M metric; const std::size_t i_edge; - const bool setE, setB; EnforcedBoundaries_kernel(ndfield_t& Fld, - const I& finit, + const I& fset, const M& metric, std::size_t i_edge, BCTags tags) : Fld { Fld } - , finit { finit } + , fset { fset } , metric { metric } - , i_edge { i_edge + N_GHOSTS } - , setE { tags & BC::Ex1 or tags & BC::Ex2 or tags & BC::Ex3 } - , setB { tags & BC::Bx1 or tags & BC::Bx2 or tags & BC::Bx3 } {} + , i_edge { i_edge + N_GHOSTS } {} Inline void operator()(index_t i1) const { if constexpr (D == Dim::_1D) { @@ -259,8 +540,8 @@ namespace kernel { coord_t x_Ph_H { ZERO }; metric.template convert({ i1_ }, x_Ph_0); metric.template convert({ i1_ + HALF }, x_Ph_H); - bool setEx1 = setE, setEx2 = setE, setEx3 = setE, setBx1 = setB, - setBx2 = setB, setBx3 = setB; + bool setEx1 = defines_ex1, setEx2 = defines_ex2, setEx3 = defines_ex3, + setBx1 = defines_bx1, setBx2 = defines_bx2, setBx3 = defines_bx3; if constexpr (O == in::x1) { // x1 -- normal // x2,x3 -- tangential @@ -276,35 +557,47 @@ namespace kernel { } else { raise::KernelError(HERE, "Invalid Orientation"); } - if (setEx1) { - Fld(i1, em::ex1) = metric.template transform<1, Idx::T, Idx::U>( - { i1_ + HALF }, - finit.ex1(x_Ph_H)); + if constexpr (defines_ex1) { + if (setEx1) { + Fld(i1, em::ex1) = metric.template transform<1, Idx::T, Idx::U>( + { i1_ + HALF }, + fset.ex1(x_Ph_H)); + } } - if (setEx2) { - Fld(i1, em::ex2) = metric.template transform<2, Idx::T, Idx::U>( - { i1_ }, - finit.ex2(x_Ph_0)); + if constexpr (defines_ex2) { + if (setEx2) { + Fld(i1, em::ex2) = metric.template transform<2, Idx::T, Idx::U>( + { i1_ }, + fset.ex2(x_Ph_0)); + } } - if (setEx3) { - Fld(i1, em::ex3) = metric.template transform<3, Idx::T, Idx::U>( - { i1_ }, - finit.ex3(x_Ph_0)); + if constexpr (defines_ex3) { + if (setEx3) { + Fld(i1, em::ex3) = metric.template transform<3, Idx::T, Idx::U>( + { i1_ }, + fset.ex3(x_Ph_0)); + } } - if (setBx1) { - Fld(i1, em::bx1) = metric.template transform<1, Idx::T, Idx::U>( - { i1_ }, - finit.bx1(x_Ph_0)); + if constexpr (defines_bx1) { + if (setBx1) { + Fld(i1, em::bx1) = metric.template transform<1, Idx::T, Idx::U>( + { i1_ }, + fset.bx1(x_Ph_0)); + } } - if (setBx2) { - Fld(i1, em::bx2) = metric.template transform<2, Idx::T, Idx::U>( - { i1_ + HALF }, - finit.bx2(x_Ph_H)); + if constexpr (defines_bx2) { + if (setBx2) { + Fld(i1, em::bx2) = metric.template transform<2, Idx::T, Idx::U>( + { i1_ + HALF }, + fset.bx2(x_Ph_H)); + } } - if (setBx3) { - Fld(i1, em::bx3) = metric.template transform<3, Idx::T, Idx::U>( - { i1_ + HALF }, - finit.bx3(x_Ph_H)); + if constexpr (defines_bx3) { + if (setBx3) { + Fld(i1, em::bx3) = metric.template transform<3, Idx::T, Idx::U>( + { i1_ + HALF }, + fset.bx3(x_Ph_H)); + } } } else { raise::KernelError(HERE, "Invalid Dimension"); @@ -324,8 +617,8 @@ namespace kernel { metric.template convert({ i1_ + HALF, i2_ }, x_Ph_H0); metric.template convert({ i1_ + HALF, i2_ + HALF }, x_Ph_HH); - bool setEx1 = setE, setEx2 = setE, setEx3 = setE, setBx1 = setB, - setBx2 = setB, setBx3 = setB; + bool setEx1 = defines_ex1, setEx2 = defines_ex2, setEx3 = defines_ex3, + setBx1 = defines_bx1, setBx2 = defines_bx2, setBx3 = defines_bx3; if constexpr (O == in::x1) { // x1 -- normal // x2,x3 -- tangential @@ -353,35 +646,47 @@ namespace kernel { } else { raise::KernelError(HERE, "Invalid Orientation"); } - if (setEx1) { - Fld(i1, i2, em::ex1) = metric.template transform<1, Idx::T, Idx::U>( - { i1_ + HALF, i2_ }, - finit.ex1(x_Ph_H0)); + if constexpr (defines_ex1) { + if (setEx1) { + Fld(i1, i2, em::ex1) = metric.template transform<1, Idx::T, Idx::U>( + { i1_ + HALF, i2_ }, + fset.ex1(x_Ph_H0)); + } } - if (setEx2) { - Fld(i1, i2, em::ex2) = metric.template transform<2, Idx::T, Idx::U>( - { i1_, i2_ + HALF }, - finit.ex2(x_Ph_0H)); + if constexpr (defines_ex2) { + if (setEx2) { + Fld(i1, i2, em::ex2) = metric.template transform<2, Idx::T, Idx::U>( + { i1_, i2_ + HALF }, + fset.ex2(x_Ph_0H)); + } } - if (setEx3) { - Fld(i1, i2, em::ex3) = metric.template transform<3, Idx::T, Idx::U>( - { i1_, i2_ }, - finit.ex3(x_Ph_00)); + if constexpr (defines_ex3) { + if (setEx3) { + Fld(i1, i2, em::ex3) = metric.template transform<3, Idx::T, Idx::U>( + { i1_, i2_ }, + fset.ex3(x_Ph_00)); + } } - if (setBx1) { - Fld(i1, i2, em::bx1) = metric.template transform<1, Idx::T, Idx::U>( - { i1_, i2_ + HALF }, - finit.bx1(x_Ph_0H)); + if constexpr (defines_bx1) { + if (setBx1) { + Fld(i1, i2, em::bx1) = metric.template transform<1, Idx::T, Idx::U>( + { i1_, i2_ + HALF }, + fset.bx1(x_Ph_0H)); + } } - if (setBx2) { - Fld(i1, i2, em::bx2) = metric.template transform<2, Idx::T, Idx::U>( - { i1_ + HALF, i2_ }, - finit.bx2(x_Ph_H0)); + if constexpr (defines_bx2) { + if (setBx2) { + Fld(i1, i2, em::bx2) = metric.template transform<2, Idx::T, Idx::U>( + { i1_ + HALF, i2_ }, + fset.bx2(x_Ph_H0)); + } } - if (setBx3) { - Fld(i1, i2, em::bx3) = metric.template transform<3, Idx::T, Idx::U>( - { i1_ + HALF, i2_ + HALF }, - finit.bx3(x_Ph_HH)); + if constexpr (defines_bx3) { + if (setBx3) { + Fld(i1, i2, em::bx3) = metric.template transform<3, Idx::T, Idx::U>( + { i1_ + HALF, i2_ + HALF }, + fset.bx3(x_Ph_HH)); + } } } else { raise::KernelError(HERE, "Invalid Dimension"); @@ -412,8 +717,8 @@ namespace kernel { x_Ph_H0H); metric.template convert({ i1_, i2_ + HALF, i3_ + HALF }, x_Ph_0HH); - bool setEx1 = setE, setEx2 = setE, setEx3 = setE, setBx1 = setB, - setBx2 = setB, setBx3 = setB; + bool setEx1 = defines_ex1, setEx2 = defines_ex2, setEx3 = defines_ex3, + setBx1 = defines_bx1, setBx2 = defines_bx2, setBx3 = defines_bx3; if constexpr (O == in::x1) { // x1 -- normal // x2,x3 -- tangential @@ -453,35 +758,47 @@ namespace kernel { } else { raise::KernelError(HERE, "Invalid Orientation"); } - if (setEx1) { - Fld(i1, i2, i3, em::ex1) = metric.template transform<1, Idx::T, Idx::U>( - { i1_ + HALF, i2_, i3_ }, - finit.ex1(x_Ph_H00)); + if constexpr (defines_ex1) { + if (setEx1) { + Fld(i1, i2, i3, em::ex1) = metric.template transform<1, Idx::T, Idx::U>( + { i1_ + HALF, i2_, i3_ }, + fset.ex1(x_Ph_H00)); + } } - if (setEx2) { - Fld(i1, i2, i3, em::ex2) = metric.template transform<2, Idx::T, Idx::U>( - { i1_, i2_ + HALF, i3_ }, - finit.ex2(x_Ph_0H0)); + if constexpr (defines_ex2) { + if (setEx2) { + Fld(i1, i2, i3, em::ex2) = metric.template transform<2, Idx::T, Idx::U>( + { i1_, i2_ + HALF, i3_ }, + fset.ex2(x_Ph_0H0)); + } } - if (setEx3) { - Fld(i1, i2, i3, em::ex3) = metric.template transform<3, Idx::T, Idx::U>( - { i1_, i2_, i3_ + HALF }, - finit.ex3(x_Ph_00H)); + if constexpr (defines_ex3) { + if (setEx3) { + Fld(i1, i2, i3, em::ex3) = metric.template transform<3, Idx::T, Idx::U>( + { i1_, i2_, i3_ + HALF }, + fset.ex3(x_Ph_00H)); + } } - if (setBx1) { - Fld(i1, i2, i3, em::bx1) = metric.template transform<1, Idx::T, Idx::U>( - { i1_, i2_ + HALF, i3_ + HALF }, - finit.bx1(x_Ph_0HH)); + if constexpr (defines_bx1) { + if (setBx1) { + Fld(i1, i2, i3, em::bx1) = metric.template transform<1, Idx::T, Idx::U>( + { i1_, i2_ + HALF, i3_ + HALF }, + fset.bx1(x_Ph_0HH)); + } } - if (setBx2) { - Fld(i1, i2, i3, em::bx2) = metric.template transform<2, Idx::T, Idx::U>( - { i1_ + HALF, i2_, i3_ + HALF }, - finit.bx2(x_Ph_H0H)); + if constexpr (defines_bx2) { + if (setBx2) { + Fld(i1, i2, i3, em::bx2) = metric.template transform<2, Idx::T, Idx::U>( + { i1_ + HALF, i2_, i3_ + HALF }, + fset.bx2(x_Ph_H0H)); + } } - if (setBx3) { - Fld(i1, i2, i3, em::bx3) = metric.template transform<3, Idx::T, Idx::U>( - { i1_ + HALF, i2_ + HALF, i3_ }, - finit.bx3(x_Ph_HH0)); + if constexpr (defines_bx3) { + if (setBx3) { + Fld(i1, i2, i3, em::bx3) = metric.template transform<3, Idx::T, Idx::U>( + { i1_ + HALF, i2_ + HALF, i3_ }, + fset.bx3(x_Ph_HH0)); + } } } else { raise::KernelError(HERE, "Invalid Dimension"); From 77e89e54503350720e7e05df91232595a4607058 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 15:18:24 -0500 Subject: [PATCH 108/124] bcs cleared --- src/engines/srpic.hpp | 26 ++- src/kernels/fields_bcs.hpp | 429 ++++++++++++++++++++----------------- 2 files changed, 249 insertions(+), 206 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index c44f7641c..9f5e4551f 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -657,7 +657,7 @@ namespace ntt { Kokkos::parallel_for( "MatchFields", CreateRangePolicy(range_min, range_max), - kernel::MatchBoundaries_kernel( + kernel::bc::MatchBoundaries_kernel( domain.fields.em, match_fields, domain.mesh.metric, @@ -669,7 +669,7 @@ namespace ntt { Kokkos::parallel_for( "MatchFields", CreateRangePolicy(range_min, range_max), - kernel::MatchBoundaries_kernel( + kernel::bc::MatchBoundaries_kernel( domain.fields.em, match_fields, domain.mesh.metric, @@ -684,7 +684,7 @@ namespace ntt { Kokkos::parallel_for( "MatchFields", CreateRangePolicy(range_min, range_max), - kernel::MatchBoundaries_kernel( + kernel::bc::MatchBoundaries_kernel( domain.fields.em, match_fields, domain.mesh.metric, @@ -716,12 +716,16 @@ namespace ntt { Kokkos::parallel_for( "AxisBCFields", domain.mesh.n_all(in::x1), - kernel::AxisBoundaries_kernel(domain.fields.em, i2_min, tags)); + kernel::bc::AxisBoundaries_kernel(domain.fields.em, + i2_min, + tags)); } else { Kokkos::parallel_for( "AxisBCFields", domain.mesh.n_all(in::x1), - kernel::AxisBoundaries_kernel(domain.fields.em, i2_max, tags)); + kernel::bc::AxisBoundaries_kernel(domain.fields.em, + i2_max, + tags)); } } @@ -878,7 +882,7 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::EnforcedBoundaries_kernel( + kernel::bc::EnforcedBoundaries_kernel( domain.fields.em, atm_fields, domain.mesh.metric, @@ -888,7 +892,7 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::EnforcedBoundaries_kernel( + kernel::bc::EnforcedBoundaries_kernel( domain.fields.em, atm_fields, domain.mesh.metric, @@ -901,7 +905,7 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::EnforcedBoundaries_kernel( + kernel::bc::EnforcedBoundaries_kernel( domain.fields.em, atm_fields, domain.mesh.metric, @@ -911,7 +915,7 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::EnforcedBoundaries_kernel( + kernel::bc::EnforcedBoundaries_kernel( domain.fields.em, atm_fields, domain.mesh.metric, @@ -927,7 +931,7 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::EnforcedBoundaries_kernel( + kernel::bc::EnforcedBoundaries_kernel( domain.fields.em, atm_fields, domain.mesh.metric, @@ -937,7 +941,7 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::EnforcedBoundaries_kernel( + kernel::bc::EnforcedBoundaries_kernel( domain.fields.em, atm_fields, domain.mesh.metric, diff --git a/src/kernels/fields_bcs.hpp b/src/kernels/fields_bcs.hpp index 4d52dd207..6fa5d6d68 100644 --- a/src/kernels/fields_bcs.hpp +++ b/src/kernels/fields_bcs.hpp @@ -1,5 +1,12 @@ /** - * @brief: kernels/fields_bcs.hpp + * @file kernels/fields_bcs.hpp + * @brief Kernels used for field boundary conditions + * @implements + * - kernel::bc::MatchBoundaries_kernel<> + * - kernel::bc::AxisBoundaries_kernel<> + * - kernel::bc::EnforcedBoundaries_kernel<> + * @namespaces: + * - kernel::bc:: */ #ifndef KERNELS_FIELDS_BCS_HPP @@ -12,9 +19,18 @@ #include "utils/error.h" #include "utils/numeric.h" -namespace kernel { +namespace kernel::bc { using namespace ntt; + /* + * @tparam S: Simulation Engine + * @tparam I: Field Setter class + * @tparam M: Metric + * @tparam o: Orientation + * + * @brief Applies matching boundary conditions (with a smooth profile) in a specific direction. + * @note If a component is not specified in the field setter, it is ignored. + */ template struct MatchBoundaries_kernel { static_assert(M::is_metric, "M must be a metric class"); @@ -31,6 +47,12 @@ namespace kernel { static constexpr bool defines_bx1 = traits::has_method::value; static constexpr bool defines_bx2 = traits::has_method::value; static constexpr bool defines_bx3 = traits::has_method::value; + static_assert( + (S == SimEngine::SRPIC and (defines_ex1 or defines_ex2 or defines_ex3 or + defines_bx1 or defines_bx2 or defines_bx3)) or + ((S == SimEngine::GRPIC) and (defines_dx1 or defines_dx2 or defines_dx3 or + defines_bx1 or defines_bx2 or defines_bx3)), + "none of the components of E/D or B are specified in PGEN"); ndfield_t Fld; const I fset; @@ -60,12 +82,12 @@ namespace kernel { if constexpr (M::Dim == Dim::_1D) { const auto i1_ = COORD(i1); - coord_t x_Ph_0 { ZERO }; - coord_t x_Ph_H { ZERO }; - metric.template convert({ i1_ }, x_Ph_0); - metric.template convert({ i1_ + HALF }, x_Ph_H); - if constexpr (S == SimEngine::SRPIC) { + coord_t x_Ph_0 { ZERO }; + coord_t x_Ph_H { ZERO }; + metric.template convert({ i1_ }, x_Ph_0); + metric.template convert({ i1_ + HALF }, x_Ph_H); + // SRPIC auto ex1_U { ZERO }, ex2_U { ZERO }, ex3_U { ZERO }, bx1_U { ZERO }, bx2_U { ZERO }, bx3_U { ZERO }; @@ -104,34 +126,44 @@ namespace kernel { } } - { + if constexpr (defines_ex1 or defines_bx2 or defines_bx3) { const auto dx = math::abs( metric.template convert(i1_ + HALF) - xg_edge); const auto s = shape(dx); - if (tags & BC::E) { - // ex1 - Fld(i1, em::ex1) = s * Fld(i1, em::ex1) + (ONE - s) * ex1_U; + if constexpr (defines_ex1) { + if (tags & BC::E) { + Fld(i1, em::ex1) = s * Fld(i1, em::ex1) + (ONE - s) * ex1_U; + } } - if (tags & BC::B) { - // bx2 - Fld(i1, em::bx2) = s * Fld(i1, em::bx2) + (ONE - s) * bx2_U; - // bx3 - Fld(i1, em::bx3) = s * Fld(i1, em::bx3) + (ONE - s) * bx3_U; + if constexpr (defines_bx2 or defines_bx3) { + if (tags & BC::B) { + if constexpr (defines_bx2) { + Fld(i1, em::bx2) = s * Fld(i1, em::bx2) + (ONE - s) * bx2_U; + } + if constexpr (defines_bx3) { + Fld(i1, em::bx3) = s * Fld(i1, em::bx3) + (ONE - s) * bx3_U; + } + } } } - { + if constexpr (defines_bx1 or defines_ex2 or defines_ex3) { const auto dx = math::abs( metric.template convert(i1_) - xg_edge); const auto s = shape(dx); - if (tags & BC::B) { - // bx1 - Fld(i1, em::bx1) = s * Fld(i1, em::bx1) + (ONE - s) * bx1_U; + if constexpr (defines_bx1) { + if (tags & BC::B) { + Fld(i1, em::bx1) = s * Fld(i1, em::bx1) + (ONE - s) * bx1_U; + } } - if (tags & BC::E) { - // ex2 - Fld(i1, em::ex2) = s * Fld(i1, em::ex2) + (ONE - s) * ex2_U; - // ex3 - Fld(i1, em::ex3) = s * Fld(i1, em::ex3) + (ONE - s) * ex3_U; + if constexpr (defines_ex2 or defines_ex3) { + if (tags & BC::E) { + if constexpr (defines_ex2) { + Fld(i1, em::ex2) = s * Fld(i1, em::ex2) + (ONE - s) * ex2_U; + } + if constexpr (defines_ex3) { + Fld(i1, em::ex3) = s * Fld(i1, em::ex3) + (ONE - s) * ex3_U; + } + } } } } else { @@ -152,7 +184,7 @@ namespace kernel { if constexpr (S == SimEngine::SRPIC) { // SRPIC - { + if constexpr (defines_ex1 or defines_bx2) { coord_t x_Ph_H0 { ZERO }; metric.template convert({ i1_ + HALF, i2_ }, x_Ph_H0); // i1 + 1/2, i2 @@ -162,31 +194,30 @@ namespace kernel { } else { xi_Cd = i2_; } + const auto dx = math::abs( metric.template convert(xi_Cd) - xg_edge); const auto s = shape(dx); - if (tags & BC::E) { - auto ex1_U { ZERO }; - if constexpr (defines_ex1) { - ex1_U = metric.template transform<1, Idx::T, Idx::U>( + + if constexpr (defines_ex1) { + if (tags & BC::E) { + const auto ex1_U = metric.template transform<1, Idx::T, Idx::U>( { i1_ + HALF, i2_ }, fset.ex1(x_Ph_H0)); + Fld(i1, i2, em::ex1) = s * Fld(i1, i2, em::ex1) + (ONE - s) * ex1_U; } - // ex1 - Fld(i1, i2, em::ex1) = s * Fld(i1, i2, em::ex1) + (ONE - s) * ex1_U; } - if (tags & BC::B) { - auto bx2_U { ZERO }; - if constexpr (defines_bx2) { - bx2_U = metric.template transform<2, Idx::T, Idx::U>( + if constexpr (defines_bx2) { + if (tags & BC::B) { + const auto bx2_U = metric.template transform<2, Idx::T, Idx::U>( { i1_ + HALF, i2_ }, fset.bx2(x_Ph_H0)); + Fld(i1, i2, em::bx2) = s * Fld(i1, i2, em::bx2) + (ONE - s) * bx2_U; } - // bx2 - Fld(i1, i2, em::bx2) = s * Fld(i1, i2, em::bx2) + (ONE - s) * bx2_U; } } - { + + if constexpr (defines_ex2 or defines_bx1) { coord_t x_Ph_0H { ZERO }; metric.template convert({ i1_, i2_ + HALF }, x_Ph_0H); // i1, i2 + 1/2 @@ -196,74 +227,74 @@ namespace kernel { } else { xi_Cd = i2_ + HALF; } + const auto dx = math::abs( metric.template convert(xi_Cd) - xg_edge); const auto s = shape(dx); - if (tags & BC::E) { - auto ex2_U { ZERO }; - if constexpr (defines_ex2) { + if constexpr (defines_ex2) { + if (tags & BC::E) { + auto ex2_U { ZERO }; ex2_U = metric.template transform<2, Idx::T, Idx::U>( { i1_, i2_ + HALF }, fset.ex2(x_Ph_0H)); + Fld(i1, i2, em::ex2) = s * Fld(i1, i2, em::ex2) + (ONE - s) * ex2_U; } - // ex2 - Fld(i1, i2, em::ex2) = s * Fld(i1, i2, em::ex2) + (ONE - s) * ex2_U; } - if (tags & BC::B) { - auto bx1_U { ZERO }; - if constexpr (defines_bx1) { + if constexpr (defines_bx1) { + if (tags & BC::B) { + auto bx1_U { ZERO }; bx1_U = metric.template transform<1, Idx::T, Idx::U>( { i1_, i2_ + HALF }, fset.bx1(x_Ph_0H)); + Fld(i1, i2, em::bx1) = s * Fld(i1, i2, em::bx1) + (ONE - s) * bx1_U; } - // bx1 - Fld(i1, i2, em::bx1) = s * Fld(i1, i2, em::bx1) + (ONE - s) * bx1_U; } } - if (tags & BC::E) { - auto ex3_U { ZERO }; - if constexpr (defines_ex3) { + + if constexpr (defines_ex3) { + if (tags & BC::E) { + auto ex3_U { ZERO }; coord_t x_Ph_00 { ZERO }; metric.template convert({ i1_, i2_ }, x_Ph_00); ex3_U = metric.template transform<3, Idx::T, Idx::U>( { i1_, i2_ }, fset.ex3(x_Ph_00)); + // i1, i2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else { + xi_Cd = i2_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + Fld(i1, i2, em::ex3) = s * Fld(i1, i2, em::ex3) + (ONE - s) * ex3_U; } - // i1, i2 - real_t xi_Cd; - if constexpr (o == in::x1) { - xi_Cd = i1_; - } else { - xi_Cd = i2_; - } - const auto dx = math::abs( - metric.template convert(xi_Cd) - xg_edge); - const auto s = shape(dx); - // ex3 - Fld(i1, i2, em::ex3) = s * Fld(i1, i2, em::ex3) + (ONE - s) * ex3_U; } - if (tags & BC::B) { - auto bx3_U { ZERO }; - if constexpr (defines_bx3) { + + if constexpr (defines_bx3) { + if (tags & BC::B) { + auto bx3_U { ZERO }; coord_t x_Ph_HH { ZERO }; metric.template convert({ i1_ + HALF, i2_ + HALF }, x_Ph_HH); bx3_U = metric.template transform<3, Idx::T, Idx::U>( { i1_ + HALF, i2_ + HALF }, fset.bx3(x_Ph_HH)); + // i1 + 1/2, i2 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else { + xi_Cd = i2_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + // bx3 + Fld(i1, i2, em::bx3) = s * Fld(i1, i2, em::bx3) + (ONE - s) * bx3_U; } - // i1 + 1/2, i2 + 1/2 - real_t xi_Cd; - if constexpr (o == in::x1) { - xi_Cd = i1_ + HALF; - } else { - xi_Cd = i2_ + HALF; - } - const auto dx = math::abs( - metric.template convert(xi_Cd) - xg_edge); - const auto s = shape(dx); - // bx3 - Fld(i1, i2, em::bx3) = s * Fld(i1, i2, em::bx3) + (ONE - s) * bx3_U; } } else { // GRPIC @@ -284,129 +315,126 @@ namespace kernel { if constexpr (S == SimEngine::SRPIC) { // SRPIC - if (tags & BC::E) { - { - // i1 + 1/2, i2, i3 - real_t xi_Cd; - if constexpr (o == in::x1) { - xi_Cd = i1_ + HALF; - } else if constexpr (o == in::x2) { - xi_Cd = i2_; - } else { - xi_Cd = i3_; - } - const auto dx = math::abs( - metric.template convert(xi_Cd) - xg_edge); - const auto s = shape(dx); - auto ex1_U { ZERO }; + if constexpr (defines_ex1 or defines_ex2 or defines_ex3) { + if (tags & BC::E) { if constexpr (defines_ex1) { + // i1 + 1/2, i2, i3 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else if constexpr (o == in::x2) { + xi_Cd = i2_; + } else { + xi_Cd = i3_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto ex1_U { ZERO }; coord_t x_Ph_H00 { ZERO }; metric.template convert({ i1_ + HALF, i2_, i3_ }, x_Ph_H00); ex1_U = metric.template transform<1, Idx::T, Idx::U>( { i1_ + HALF, i2_, i3_ }, fset.ex1(x_Ph_H00)); + Fld(i1, i2, i3, em::ex1) = s * Fld(i1, i2, i3, em::ex1) + + (ONE - s) * ex1_U; } - // ex1 - Fld(i1, i2, i3, em::ex1) = s * Fld(i1, i2, i3, em::ex1) + - (ONE - s) * ex1_U; - } - { - // i1, i2 + 1/2, i3 - real_t xi_Cd; - if constexpr (o == in::x1) { - xi_Cd = i1_; - } else if constexpr (o == in::x2) { - xi_Cd = i2_ + HALF; - } else { - xi_Cd = i3_; - } - const auto dx = math::abs( - metric.template convert(xi_Cd) - xg_edge); - const auto s = shape(dx); - auto ex2_U { ZERO }; + if constexpr (defines_ex2) { + // i1, i2 + 1/2, i3 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else if constexpr (o == in::x2) { + xi_Cd = i2_ + HALF; + } else { + xi_Cd = i3_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto ex2_U { ZERO }; coord_t x_Ph_0H0 { ZERO }; metric.template convert({ i1_, i2_ + HALF, i3_ }, x_Ph_0H0); ex2_U = metric.template transform<2, Idx::T, Idx::U>( { i1_, i2_ + HALF, i3_ }, fset.ex2(x_Ph_0H0)); + Fld(i1, i2, i3, em::ex2) = s * Fld(i1, i2, i3, em::ex2) + + (ONE - s) * ex2_U; } - // ex2 - Fld(i1, i2, i3, em::ex2) = s * Fld(i1, i2, i3, em::ex2) + - (ONE - s) * ex2_U; - } - { - // i1, i2, i3 + 1/2 - real_t xi_Cd; - if constexpr (o == in::x1) { - xi_Cd = i1_; - } else if constexpr (o == in::x2) { - xi_Cd = i2_; - } else { - xi_Cd = i3_ + HALF; - } - const auto dx = math::abs( - metric.template convert(xi_Cd) - xg_edge); - const auto s = shape(dx); - auto ex3_U { ZERO }; + if constexpr (defines_ex3) { + // i1, i2, i3 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else if constexpr (o == in::x2) { + xi_Cd = i2_; + } else { + xi_Cd = i3_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto ex3_U { ZERO }; coord_t x_Ph_00H { ZERO }; metric.template convert({ i1_, i2_, i3_ + HALF }, x_Ph_00H); ex3_U = metric.template transform<3, Idx::T, Idx::U>( { i1_, i2_, i3_ + HALF }, fset.ex3(x_Ph_00H)); + Fld(i1, i2, i3, em::ex3) = s * Fld(i1, i2, i3, em::ex3) + + (ONE - s) * ex3_U; } - // ex3 - Fld(i1, i2, i3, em::ex3) = s * Fld(i1, i2, i3, em::ex3) + - (ONE - s) * ex3_U; } } - if (tags & BC::B) { - { - // i1, i2 + 1/2, i3 + 1/2 - real_t xi_Cd; - if constexpr (o == in::x1) { - xi_Cd = i1_; - } else if constexpr (o == in::x2) { - xi_Cd = i2_ + HALF; - } else { - xi_Cd = i3_ + HALF; - } - const auto dx = math::abs( - metric.template convert(xi_Cd) - xg_edge); - const auto s = shape(dx); - auto bx1_U { ZERO }; + + if constexpr (defines_bx1 or defines_bx2 or defines_bx3) { + if (tags & BC::B) { if constexpr (defines_bx1) { - coord_t x_Ph_0HH { ZERO }; - metric.template convert( - { i1_, i2_ + HALF, i3_ + HALF }, - x_Ph_0HH); - bx1_U = metric.template transform<1, Idx::T, Idx::U>( - { i1_, i2_ + HALF, i3_ + HALF }, - fset.bx1(x_Ph_0HH)); + // i1, i2 + 1/2, i3 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else if constexpr (o == in::x2) { + xi_Cd = i2_ + HALF; + } else { + xi_Cd = i3_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto bx1_U { ZERO }; + if constexpr (defines_bx1) { + coord_t x_Ph_0HH { ZERO }; + metric.template convert( + { i1_, i2_ + HALF, i3_ + HALF }, + x_Ph_0HH); + bx1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_, i2_ + HALF, i3_ + HALF }, + fset.bx1(x_Ph_0HH)); + } + // bx1 + Fld(i1, i2, i3, em::bx1) = s * Fld(i1, i2, i3, em::bx1) + + (ONE - s) * bx1_U; } - // bx1 - Fld(i1, i2, i3, em::bx1) = s * Fld(i1, i2, i3, em::bx1) + - (ONE - s) * bx1_U; - } - { - // i1 + 1/2, i2, i3 + 1/2 - real_t xi_Cd; - if constexpr (o == in::x1) { - xi_Cd = i1_ + HALF; - } else if constexpr (o == in::x2) { - xi_Cd = i2_; - } else { - xi_Cd = i3_ + HALF; - } - const auto dx = math::abs( - metric.template convert(xi_Cd) - xg_edge); - const auto s = shape(dx); - auto bx2_U { ZERO }; + if constexpr (defines_bx2) { + // i1 + 1/2, i2, i3 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else if constexpr (o == in::x2) { + xi_Cd = i2_; + } else { + xi_Cd = i3_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto bx2_U { ZERO }; coord_t x_Ph_H0H { ZERO }; metric.template convert( { i1_ + HALF, i2_, i3_ + HALF }, @@ -414,26 +442,24 @@ namespace kernel { bx2_U = metric.template transform<2, Idx::T, Idx::U>( { i1_ + HALF, i2_, i3_ + HALF }, fset.bx2(x_Ph_H0H)); + Fld(i1, i2, i3, em::bx2) = s * Fld(i1, i2, i3, em::bx2) + + (ONE - s) * bx2_U; } - // bx2 - Fld(i1, i2, i3, em::bx2) = s * Fld(i1, i2, i3, em::bx2) + - (ONE - s) * bx2_U; - } - { - // i1 + 1/2, i2 + 1/2, i3 - real_t xi_Cd; - if constexpr (o == in::x1) { - xi_Cd = i1_ + HALF; - } else if constexpr (o == in::x2) { - xi_Cd = i2_ + HALF; - } else { - xi_Cd = i3_; - } - const auto dx = math::abs( - metric.template convert(xi_Cd) - xg_edge); - const auto s = shape(dx); - auto bx3_U { ZERO }; + if constexpr (defines_bx3) { + // i1 + 1/2, i2 + 1/2, i3 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else if constexpr (o == in::x2) { + xi_Cd = i2_ + HALF; + } else { + xi_Cd = i3_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto bx3_U { ZERO }; coord_t x_Ph_HH0 { ZERO }; metric.template convert( { i1_ + HALF, i2_ + HALF, i3_ }, @@ -441,10 +467,9 @@ namespace kernel { bx3_U = metric.template transform<3, Idx::T, Idx::U>( { i1_ + HALF, i2_ + HALF, i3_ }, fset.bx3(x_Ph_HH0)); + Fld(i1, i2, i3, em::bx3) = s * Fld(i1, i2, i3, em::bx3) + + (ONE - s) * bx3_U; } - // bx3 - Fld(i1, i2, i3, em::bx3) = s * Fld(i1, i2, i3, em::bx3) + - (ONE - s) * bx3_U; } } } else { @@ -459,6 +484,12 @@ namespace kernel { } }; + /* + * @tparam D: Dimension + * @tparam P: Positive/Negative direction + * + * @brief Applies boundary conditions near the polar axis + */ template struct AxisBoundaries_kernel { ndfield_t Fld; @@ -500,6 +531,14 @@ namespace kernel { } }; + /* + * @tparam I: Field Setter class + * @tparam M: Metric + * @tparam P: Positive/Negative direction + * @tparam O: Orientation + * + * @brief Applies enforced boundary conditions (fixed value) + */ template struct EnforcedBoundaries_kernel { static constexpr Dimension D = M::Dim; @@ -806,6 +845,6 @@ namespace kernel { } }; -} // namespace kernel +} // namespace kernel::bc #endif // KERNELS_FIELDS_BCS_HPP From c7c96965947d06d4415328b4b4d7b39d044664e5 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 16:00:40 -0500 Subject: [PATCH 109/124] old tests passing --- src/framework/tests/parameters.cpp | 38 +++++++++++++++--------------- src/output/tests/writer-nompi.cpp | 10 ++++---- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 1a4228642..7cd5ce46a 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -32,7 +32,7 @@ const auto mink_1d = u8R"( fields = [["PERIODIC"]] particles = [["ABSORB", "ABSORB"]] - [grid.boundaries.absorb] + [grid.boundaries.match] coeff = 10.0 ds = 0.025 @@ -101,10 +101,10 @@ const auto sph_2d = u8R"( metric = "spherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "MATCH"]] particles = [["ATMOSPHERE", "ABSORB"]] - [grid.boundaries.absorb] + [grid.boundaries.match] coeff = 10.0 [grid.boundaries.atmosphere] @@ -180,7 +180,7 @@ const auto qks_2d = u8R"( ks_a = 0.99 [grid.boundaries] - fields = [["ABSORB"]] + fields = [["MATCH"]] particles = [["ABSORB"]] [scales] @@ -345,8 +345,8 @@ auto main(int argc, char* argv[]) -> int { "simulation.engine"); boundaries_t fbc = { - { FldsBC::ATMOSPHERE, FldsBC::ABSORB }, - { FldsBC::AXIS, FldsBC::AXIS } + { FldsBC::ATMOSPHERE, FldsBC::MATCH }, + { FldsBC::AXIS, FldsBC::AXIS } }; assert_equal(params_sph_2d.get("scales.B0"), @@ -381,16 +381,16 @@ auto main(int argc, char* argv[]) -> int { fbc.size(), "grid.boundaries.fields.size()"); - // absorb coeffs + // match coeffs assert_equal( - params_sph_2d.get("grid.boundaries.absorb.ds"), - (real_t)(defaults::bc::absorb::ds_frac * 19.0), - "grid.boundaries.absorb.ds"); + params_sph_2d.get("grid.boundaries.match.ds"), + (real_t)(defaults::bc::match::ds_frac * 19.0), + "grid.boundaries.match.ds"); assert_equal( - params_sph_2d.get("grid.boundaries.absorb.coeff"), + params_sph_2d.get("grid.boundaries.match.coeff"), (real_t)10.0, - "grid.boundaries.absorb.coeff"); + "grid.boundaries.match.coeff"); assert_equal(params_sph_2d.get("particles.use_weights"), true, @@ -537,16 +537,16 @@ auto main(int argc, char* argv[]) -> int { pbc.size(), "grid.boundaries.particles.size()"); - // absorb coeffs + // match coeffs assert_equal( - params_qks_2d.get("grid.boundaries.absorb.ds"), - (real_t)(defaults::bc::absorb::ds_frac * (100.0 - 0.8)), - "grid.boundaries.absorb.ds"); + params_qks_2d.get("grid.boundaries.match.ds"), + (real_t)(defaults::bc::match::ds_frac * (100.0 - 0.8)), + "grid.boundaries.match.ds"); assert_equal( - params_qks_2d.get("grid.boundaries.absorb.coeff"), - defaults::bc::absorb::coeff, - "grid.boundaries.absorb.coeff"); + params_qks_2d.get("grid.boundaries.match.coeff"), + defaults::bc::match::coeff, + "grid.boundaries.match.coeff"); const auto species = params_qks_2d.get>( "particles.species"); diff --git a/src/output/tests/writer-nompi.cpp b/src/output/tests/writer-nompi.cpp index 08200d804..8fb2ac026 100644 --- a/src/output/tests/writer-nompi.cpp +++ b/src/output/tests/writer-nompi.cpp @@ -70,7 +70,7 @@ auto main(int argc, char* argv[]) -> int { { // write auto writer = out::Writer(); - writer.init(&adios, "hdf5", "test"); + writer.init(&adios, "hdf5", "test", false); writer.defineMeshLayout({ nx1, nx2, nx3 }, { 0, 0, 0 }, { nx1, nx2, nx3 }, @@ -84,13 +84,13 @@ auto main(int argc, char* argv[]) -> int { field_names.push_back(writer.fieldWriters()[0].name(i)); addresses.push_back(i); } - writer.beginWriting(10, 123.0); + writer.beginWriting(WriteMode::Fields, 10, 123.0); writer.writeField(field_names, field, addresses); - writer.endWriting(); + writer.endWriting(WriteMode::Fields); - writer.beginWriting(20, 123.4); + writer.beginWriting(WriteMode::Fields, 20, 123.4); writer.writeField(field_names, field, addresses); - writer.endWriting(); + writer.endWriting(WriteMode::Fields); } adios.FlushAll(); From f61d1b827f18df7cb31d8ed4230ef71625055206 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 18:22:37 -0500 Subject: [PATCH 110/124] match field test --- src/kernels/fields_bcs.hpp | 1 + src/kernels/tests/CMakeLists.txt | 1 + src/kernels/tests/flds_bc.cpp | 210 +++++++++++++++++++++++++++++++ 3 files changed, 212 insertions(+) create mode 100644 src/kernels/tests/flds_bc.cpp diff --git a/src/kernels/fields_bcs.hpp b/src/kernels/fields_bcs.hpp index 6fa5d6d68..363ff3ad2 100644 --- a/src/kernels/fields_bcs.hpp +++ b/src/kernels/fields_bcs.hpp @@ -30,6 +30,7 @@ namespace kernel::bc { * * @brief Applies matching boundary conditions (with a smooth profile) in a specific direction. * @note If a component is not specified in the field setter, it is ignored. + * @note It is supposed to only be called on the active side of the absorbing edge (so sign is not needed). */ template struct MatchBoundaries_kernel { diff --git a/src/kernels/tests/CMakeLists.txt b/src/kernels/tests/CMakeLists.txt index 10e8bb944..a41ea43ef 100644 --- a/src/kernels/tests/CMakeLists.txt +++ b/src/kernels/tests/CMakeLists.txt @@ -31,3 +31,4 @@ gen_test(fields_to_phys) gen_test(prtls_to_phys) gen_test(gca_pusher) gen_test(prtl_bc) +gen_test(flds_bc) diff --git a/src/kernels/tests/flds_bc.cpp b/src/kernels/tests/flds_bc.cpp new file mode 100644 index 000000000..aba829e8b --- /dev/null +++ b/src/kernels/tests/flds_bc.cpp @@ -0,0 +1,210 @@ +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "utils/comparators.h" +#include "utils/error.h" + +#include "metrics/minkowski.h" + +#include "kernels/fields_bcs.hpp" + +#include + +#include +#include +#include + +using namespace ntt; +using namespace kernel::bc; +using namespace metric; + +void errorIf(bool condition, const std::string& message) { + if (condition) { + throw std::runtime_error(message); + } +} + +template +struct DummyFieldsBCs { + DummyFieldsBCs() {} + + Inline auto ex1(const coord_t&) const -> real_t { + return TWO; + } + + Inline auto ex2(const coord_t&) const -> real_t { + return THREE; + } + + Inline auto bx2(const coord_t&) const -> real_t { + return FOUR; + } + + Inline auto bx3(const coord_t&) const -> real_t { + return FIVE; + } +}; + +Inline auto equal(real_t a, real_t b, const char* msg, real_t acc) -> bool { + if (not(math::abs(a - b) < acc)) { + printf("%.12e != %.12e [%.12e] %s\n", a, b, math::abs(a - b), msg); + return false; + } + return true; +} + +template +void testFldsBCs(const std::vector& res) { + errorIf(res.size() != (unsigned short)D, "res.size() != D"); + boundaries_t sx; + for (const auto& r : res) { + sx.emplace_back(ZERO, r); + } + const auto metric = Minkowski { res, sx }; + auto fset = DummyFieldsBCs {}; + ndfield_t flds; + if constexpr (D == Dim::_1D) { + flds = ndfield_t { "flds", res[0] + 2 * N_GHOSTS }; + } else if constexpr (D == Dim::_2D) { + flds = ndfield_t { "flds", res[0] + 2 * N_GHOSTS, res[1] + 2 * N_GHOSTS }; + } else if constexpr (D == Dim::_3D) { + flds = ndfield_t { "flds", + res[0] + 2 * N_GHOSTS, + res[1] + 2 * N_GHOSTS, + res[2] + 2 * N_GHOSTS }; + } + + range_t range; + + if constexpr (D == Dim::_1D) { + range = CreateRangePolicy({ res[0] / 2 + N_GHOSTS }, + { res[0] + 2 * N_GHOSTS }); + } else if constexpr (D == Dim::_2D) { + range = CreateRangePolicy({ res[0] / 2 + N_GHOSTS, 0 }, + { res[0] + 2 * N_GHOSTS, res[1] + N_GHOSTS }); + } else if constexpr (D == Dim::_3D) { + range = CreateRangePolicy( + { res[0] / 2 + N_GHOSTS, 0, 0 }, + { res[0] + 2 * N_GHOSTS, res[1] + N_GHOSTS, res[2] + N_GHOSTS }); + } + + const auto xg_edge = (real_t)(sx[0].second); + const auto dx_abs = (real_t)(res[0] / 10.0); + + Kokkos::parallel_for( + "MatchBoundaries_kernel", + range, + MatchBoundaries_kernel( + flds, + fset, + metric, + xg_edge, + dx_abs, + BC::E | BC::B)); + + if constexpr (D == Dim::_1D) { + Kokkos::parallel_for( + "MatchBoundaries_kernel", + CreateRangePolicy({ N_GHOSTS }, { res[0] + N_GHOSTS }), + Lambda(index_t i1) { + const auto x = static_cast(i1 - N_GHOSTS); + const auto factor1 = math::tanh( + FOUR * math::abs(x + HALF - xg_edge) / dx_abs); + const auto factor2 = math::tanh(FOUR * math::abs(x - xg_edge) / dx_abs); + if (not cmp::AlmostEqual(flds(i1, em::ex1), TWO * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, em::ex1), TWO * (ONE - factor1)); + raise::KernelError(HERE, "incorrect ex1"); + } + if (not cmp::AlmostEqual(flds(i1, em::ex2), THREE * (ONE - factor2))) { + printf("%f != %f\n", flds(i1, em::ex2), THREE * (ONE - factor2)); + raise::KernelError(HERE, "incorrect ex2"); + } + if (not cmp::AlmostEqual(flds(i1, em::bx2), FOUR * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, em::bx2), FOUR * (ONE - factor1)); + raise::KernelError(HERE, "incorrect bx2"); + } + if (not cmp::AlmostEqual(flds(i1, em::bx3), FIVE * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, em::bx3), FIVE * (ONE - factor1)); + raise::KernelError(HERE, "incorrect bx3"); + } + }); + } else if constexpr (D == Dim::_2D) { + Kokkos::parallel_for( + "MatchBoundaries_kernel", + CreateRangePolicy({ N_GHOSTS, N_GHOSTS }, + { res[0] + N_GHOSTS, res[1] + N_GHOSTS }), + Lambda(index_t i1, index_t i2) { + const auto x = static_cast(i1 - N_GHOSTS); + const auto factor1 = math::tanh( + FOUR * math::abs(x + HALF - xg_edge) / dx_abs); + const auto factor2 = math::tanh(FOUR * math::abs(x - xg_edge) / dx_abs); + if (not cmp::AlmostEqual(flds(i1, i2, em::ex1), TWO * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, i2, em::ex1), TWO * (ONE - factor1)); + raise::KernelError(HERE, "incorrect ex1"); + } + if (not cmp::AlmostEqual(flds(i1, i2, em::ex2), THREE * (ONE - factor2))) { + printf("%f != %f\n", flds(i1, i2, em::ex2), THREE * (ONE - factor2)); + raise::KernelError(HERE, "incorrect ex2"); + } + if (not cmp::AlmostEqual(flds(i1, i2, em::bx2), FOUR * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, i2, em::bx2), FOUR * (ONE - factor1)); + raise::KernelError(HERE, "incorrect bx2"); + } + if (not cmp::AlmostEqual(flds(i1, i2, em::bx3), FIVE * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, i2, em::bx3), FIVE * (ONE - factor1)); + raise::KernelError(HERE, "incorrect bx3"); + } + }); + } else if constexpr (D == Dim::_3D) { + Kokkos::parallel_for( + "MatchBoundaries_kernel", + CreateRangePolicy( + { N_GHOSTS, N_GHOSTS, N_GHOSTS }, + { res[0] + N_GHOSTS, res[1] + N_GHOSTS, res[2] + N_GHOSTS }), + Lambda(index_t i1, index_t i2, index_t i3) { + const auto x = static_cast(i1 - N_GHOSTS); + const auto factor1 = math::tanh( + FOUR * math::abs(x + HALF - xg_edge) / dx_abs); + const auto factor2 = math::tanh(FOUR * math::abs(x - xg_edge) / dx_abs); + if (not cmp::AlmostEqual(flds(i1, i2, i3, em::ex1), TWO * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, i2, i3, em::ex1), TWO * (ONE - factor1)); + raise::KernelError(HERE, "incorrect ex1"); + } + if (not cmp::AlmostEqual(flds(i1, i2, i3, em::ex2), + THREE * (ONE - factor2))) { + printf("%f != %f\n", flds(i1, i2, i3, em::ex2), THREE * (ONE - factor2)); + raise::KernelError(HERE, "incorrect ex2"); + } + if (not cmp::AlmostEqual(flds(i1, i2, i3, em::bx2), + FOUR * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, i2, i3, em::bx2), FOUR * (ONE - factor1)); + raise::KernelError(HERE, "incorrect bx2"); + } + if (not cmp::AlmostEqual(flds(i1, i2, i3, em::bx3), + FIVE * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, i2, i3, em::bx3), FIVE * (ONE - factor1)); + raise::KernelError(HERE, "incorrect bx3"); + } + }); + } +} + +auto main(int argc, char* argv[]) -> int { + Kokkos::initialize(argc, argv); + + try { + using namespace ntt; + + testFldsBCs({ 24 }); + testFldsBCs({ 64, 32 }); + testFldsBCs({ 14, 22, 15 }); + + } catch (std::exception& e) { + std::cerr << e.what() << std::endl; + Kokkos::finalize(); + return 1; + } + Kokkos::finalize(); + return 0; +} From f81a9a5e2928a0037759302fe6a3fb366db8a745 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 30 Jan 2025 14:44:22 -0500 Subject: [PATCH 111/124] minor --- dev/nix/shell.nix | 3 --- input.example.toml | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix index 13509dac2..1f21e82b0 100644 --- a/dev/nix/shell.nix +++ b/dev/nix/shell.nix @@ -26,10 +26,7 @@ pkgs.mkShell { python312Packages.jupyter cmake-format -<<<<<<< HEAD cmake-lint -======= ->>>>>>> 8b0f205a866f7f7534d8190e3ebee580ea09f7d8 neocmakelsp black pyright diff --git a/input.example.toml b/input.example.toml index 3f367995a..788c30685 100644 --- a/input.example.toml +++ b/input.example.toml @@ -93,8 +93,8 @@ # @valid: "PERIODIC", "MATCH", "FIXED", "ATMOSPHERE", "CUSTOM", "HORIZON" # @example: [["CUSTOM", "MATCH"]] (for 2D spherical [[rmin, rmax]]) # @note: When periodic in any of the directions, you should only set one value: [..., ["PERIODIC"], ...] - # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]): [["ATMOSPHERE", "match"]] - # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["match"]] + # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]): [["ATMOSPHERE", "MATCH"]] + # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["MATCH"]] fields = "" # Boundary conditions for fields: # @required From 9ce606cd91f66d7111570756e183cb463f59b885 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 30 Jan 2025 16:07:12 -0500 Subject: [PATCH 112/124] changed bc in setups --- .../_monopole}/monopole.toml | 0 .../monopole => legacy/_monopole}/pgen.hpp | 2 +- setups/srpic/magnetar/pgen.hpp | 321 +++++++++--------- setups/srpic/magnetosphere/pgen.hpp | 6 +- 4 files changed, 167 insertions(+), 162 deletions(-) rename {setups/srpic/monopole => legacy/_monopole}/monopole.toml (100%) rename {setups/srpic/monopole => legacy/_monopole}/pgen.hpp (97%) diff --git a/setups/srpic/monopole/monopole.toml b/legacy/_monopole/monopole.toml similarity index 100% rename from setups/srpic/monopole/monopole.toml rename to legacy/_monopole/monopole.toml diff --git a/setups/srpic/monopole/pgen.hpp b/legacy/_monopole/pgen.hpp similarity index 97% rename from setups/srpic/monopole/pgen.hpp rename to legacy/_monopole/pgen.hpp index 389a6c6f7..ed8877b71 100644 --- a/setups/srpic/monopole/pgen.hpp +++ b/legacy/_monopole/pgen.hpp @@ -86,7 +86,7 @@ namespace user { inline PGen() {} - auto FieldDriver(real_t time) const -> DriveFields { + auto AtmFields(real_t time) const -> DriveFields { return DriveFields { time, Bsurf, Rstar, Omega }; } }; diff --git a/setups/srpic/magnetar/pgen.hpp b/setups/srpic/magnetar/pgen.hpp index cacbb7c9a..10f98ea5d 100644 --- a/setups/srpic/magnetar/pgen.hpp +++ b/setups/srpic/magnetar/pgen.hpp @@ -85,7 +85,7 @@ namespace user { const real_t Bsurf, Rstar, Omega, gamma_pairs, pp_thres; InitFields init_flds; - + inline PGen(const SimulationParams& p, const Metadomain& m) : arch::ProblemGenerator(p) , global_domain { m } @@ -94,12 +94,11 @@ namespace user { , Omega { p.template get("setup.omega") } , pp_thres { p.template get("setup.pp_thres") } , gamma_pairs { p.template get("setup.gamma_pairs") } - , init_flds { Bsurf, Rstar } { - } + , init_flds { Bsurf, Rstar } {} inline PGen() {} - auto FieldDriver(real_t time) const -> DriveFields { + auto AtmFields(real_t time) const -> DriveFields { const real_t omega_t = Omega * ((ONE - math::tanh((static_cast(5.0) - time) * HALF)) * @@ -109,170 +108,172 @@ namespace user { return DriveFields { time, Bsurf, Rstar, omega_t }; } - void CustomPostStep(std::size_t , long double, Domain& domain) { - - // Ad-hoc PP kernel - { - - auto& species2_e = domain.species[2]; - auto& species2_p = domain.species[3]; - auto& species3_e = domain.species[4]; - auto& species3_p = domain.species[5]; - auto metric = domain.mesh.metric; - auto pp_thres_ = this->pp_thres; - auto gamma_pairs_ = this->gamma_pairs; - - for (std::size_t s { 0 }; s < 6; ++s) { - if (s == 1) { - continue; - } - - array_t elec_ind("elec_ind"); - array_t pos_ind("pos_ind"); - - auto offset_e = species3_e.npart(); - auto offset_p = species3_p.npart(); - - auto ux1_e = species3_e.ux1; - auto ux2_e = species3_e.ux2; - auto ux3_e = species3_e.ux3; - auto i1_e = species3_e.i1; - auto i2_e = species3_e.i2; - auto dx1_e = species3_e.dx1; - auto dx2_e = species3_e.dx2; - auto phi_e = species3_e.phi; - auto weight_e = species3_e.weight; - auto tag_e = species3_e.tag; - - auto ux1_p = species3_p.ux1; - auto ux2_p = species3_p.ux2; - auto ux3_p = species3_p.ux3; - auto i1_p = species3_p.i1; - auto i2_p = species3_p.i2; - auto dx1_p = species3_p.dx1; - auto dx2_p = species3_p.dx2; - auto phi_p = species3_p.phi; - auto weight_p = species3_p.weight; - auto tag_p = species3_p.tag; - - if (s == 0) { - - offset_e = species2_e.npart(); - offset_p = species2_p.npart(); - - ux1_e = species2_e.ux1; - ux2_e = species2_e.ux2; - ux3_e = species2_e.ux3; - i1_e = species2_e.i1; - i2_e = species2_e.i2; - dx1_e = species2_e.dx1; - dx2_e = species2_e.dx2; - phi_e = species2_e.phi; - weight_e = species2_e.weight; - tag_e = species2_e.tag; - - ux1_p = species2_p.ux1; - ux2_p = species2_p.ux2; - ux3_p = species2_p.ux3; - i1_p = species2_p.i1; - i2_p = species2_p.i2; - dx1_p = species2_p.dx1; - dx2_p = species2_p.dx2; - phi_p = species2_p.phi; - weight_p = species2_p.weight; - tag_p = species2_p.tag; - - } - - auto& species = domain.species[s]; - auto ux1 = species.ux1; - auto ux2 = species.ux2; - auto ux3 = species.ux3; - auto i1 = species.i1; - auto i2 = species.i2; - auto dx1 = species.dx1; - auto dx2 = species.dx2; - auto phi = species.phi; - auto weight = species.weight; - auto tag = species.tag; - - Kokkos::parallel_for( - "InjectPairs", species.rangeActiveParticles(), Lambda(index_t p) { - if (tag(p) == ParticleTag::dead) { - return; - } + auto MatchFields(real_t) const -> InitFields { + return InitFields { Bsurf, Rstar }; + } - auto px = ux1(p); - auto py = ux2(p); - auto pz = ux3(p); - auto gamma = math::sqrt(ONE + SQR(px) + SQR(py) + SQR(pz)); - - const coord_t xCd{ - static_cast(i1(p)) + dx1(p), - static_cast(i2(p)) + dx2(p)}; - - coord_t xPh { ZERO }; - metric.template convert(xCd, xPh); - - if ((gamma > pp_thres_) && (math::sin(xPh[1]) > 0.1)) { - - auto new_gamma = gamma - 2.0 * gamma_pairs_; - auto new_fac = math::sqrt(SQR(new_gamma) - 1.0) / math::sqrt(SQR(gamma) - 1.0); - auto pair_fac = math::sqrt(SQR(gamma_pairs_) - 1.0) / math::sqrt(SQR(gamma) - 1.0); - - auto elec_p = Kokkos::atomic_fetch_add(&elec_ind(), 1); - auto pos_p = Kokkos::atomic_fetch_add(&pos_ind(), 1); - - i1_e(elec_p + offset_e) = i1(p); - dx1_e(elec_p + offset_e) = dx1(p); - i2_e(elec_p + offset_e) = i2(p); - dx2_e(elec_p + offset_e) = dx2(p); - phi_e(elec_p + offset_e) = phi(p); - ux1_e(elec_p + offset_e) = px * pair_fac; - ux2_e(elec_p + offset_e) = py * pair_fac; - ux3_e(elec_p + offset_e) = pz * pair_fac; - weight_e(elec_p + offset_e) = weight(p); - tag_e(elec_p + offset_e) = ParticleTag::alive; - - i1_p(pos_p + offset_p) = i1(p); - dx1_p(pos_p + offset_p) = dx1(p); - i2_p(pos_p + offset_p) = i2(p); - dx2_p(pos_p + offset_p) = dx2(p); - phi_p(pos_p + offset_p) = phi(p); - ux1_p(pos_p + offset_p) = px * pair_fac; - ux2_p(pos_p + offset_p) = py * pair_fac; - ux3_p(pos_p + offset_p) = pz * pair_fac; - weight_p(pos_p + offset_p) = weight(p); - tag_p(pos_p + offset_p) = ParticleTag::alive; - - ux1(p) *= new_fac; - ux2(p) *= new_fac; - ux3(p) *= new_fac; - } + void CustomPostStep(std::size_t, long double, Domain& domain) { - }); + // Ad-hoc PP kernel + { - auto elec_ind_h = Kokkos::create_mirror(elec_ind); - Kokkos::deep_copy(elec_ind_h, elec_ind); - if (s == 0) { - species2_e.set_npart(offset_e + elec_ind_h()); - } else { - species3_e.set_npart(offset_e + elec_ind_h()); - } + auto& species2_e = domain.species[2]; + auto& species2_p = domain.species[3]; + auto& species3_e = domain.species[4]; + auto& species3_p = domain.species[5]; + auto metric = domain.mesh.metric; + auto pp_thres_ = this->pp_thres; + auto gamma_pairs_ = this->gamma_pairs; - auto pos_ind_h = Kokkos::create_mirror(pos_ind); - Kokkos::deep_copy(pos_ind_h, pos_ind); - if (s == 0) { - species2_p.set_npart(offset_p + pos_ind_h()); - } else { - species3_p.set_npart(offset_p + pos_ind_h()); - } + for (std::size_t s { 0 }; s < 6; ++s) { + if (s == 1) { + continue; + } + array_t elec_ind("elec_ind"); + array_t pos_ind("pos_ind"); + + auto offset_e = species3_e.npart(); + auto offset_p = species3_p.npart(); + + auto ux1_e = species3_e.ux1; + auto ux2_e = species3_e.ux2; + auto ux3_e = species3_e.ux3; + auto i1_e = species3_e.i1; + auto i2_e = species3_e.i2; + auto dx1_e = species3_e.dx1; + auto dx2_e = species3_e.dx2; + auto phi_e = species3_e.phi; + auto weight_e = species3_e.weight; + auto tag_e = species3_e.tag; + + auto ux1_p = species3_p.ux1; + auto ux2_p = species3_p.ux2; + auto ux3_p = species3_p.ux3; + auto i1_p = species3_p.i1; + auto i2_p = species3_p.i2; + auto dx1_p = species3_p.dx1; + auto dx2_p = species3_p.dx2; + auto phi_p = species3_p.phi; + auto weight_p = species3_p.weight; + auto tag_p = species3_p.tag; + + if (s == 0) { + + offset_e = species2_e.npart(); + offset_p = species2_p.npart(); + + ux1_e = species2_e.ux1; + ux2_e = species2_e.ux2; + ux3_e = species2_e.ux3; + i1_e = species2_e.i1; + i2_e = species2_e.i2; + dx1_e = species2_e.dx1; + dx2_e = species2_e.dx2; + phi_e = species2_e.phi; + weight_e = species2_e.weight; + tag_e = species2_e.tag; + + ux1_p = species2_p.ux1; + ux2_p = species2_p.ux2; + ux3_p = species2_p.ux3; + i1_p = species2_p.i1; + i2_p = species2_p.i2; + dx1_p = species2_p.dx1; + dx2_p = species2_p.dx2; + phi_p = species2_p.phi; + weight_p = species2_p.weight; + tag_p = species2_p.tag; + } + + auto& species = domain.species[s]; + auto ux1 = species.ux1; + auto ux2 = species.ux2; + auto ux3 = species.ux3; + auto i1 = species.i1; + auto i2 = species.i2; + auto dx1 = species.dx1; + auto dx2 = species.dx2; + auto phi = species.phi; + auto weight = species.weight; + auto tag = species.tag; + + Kokkos::parallel_for( + "InjectPairs", + species.rangeActiveParticles(), + Lambda(index_t p) { + if (tag(p) == ParticleTag::dead) { + return; + } + + auto px = ux1(p); + auto py = ux2(p); + auto pz = ux3(p); + auto gamma = math::sqrt(ONE + SQR(px) + SQR(py) + SQR(pz)); + + const coord_t xCd { static_cast(i1(p)) + dx1(p), + static_cast(i2(p)) + dx2(p) }; + + coord_t xPh { ZERO }; + metric.template convert(xCd, xPh); + + if ((gamma > pp_thres_) && (math::sin(xPh[1]) > 0.1)) { + + auto new_gamma = gamma - 2.0 * gamma_pairs_; + auto new_fac = math::sqrt(SQR(new_gamma) - 1.0) / + math::sqrt(SQR(gamma) - 1.0); + auto pair_fac = math::sqrt(SQR(gamma_pairs_) - 1.0) / + math::sqrt(SQR(gamma) - 1.0); + + auto elec_p = Kokkos::atomic_fetch_add(&elec_ind(), 1); + auto pos_p = Kokkos::atomic_fetch_add(&pos_ind(), 1); + + i1_e(elec_p + offset_e) = i1(p); + dx1_e(elec_p + offset_e) = dx1(p); + i2_e(elec_p + offset_e) = i2(p); + dx2_e(elec_p + offset_e) = dx2(p); + phi_e(elec_p + offset_e) = phi(p); + ux1_e(elec_p + offset_e) = px * pair_fac; + ux2_e(elec_p + offset_e) = py * pair_fac; + ux3_e(elec_p + offset_e) = pz * pair_fac; + weight_e(elec_p + offset_e) = weight(p); + tag_e(elec_p + offset_e) = ParticleTag::alive; + + i1_p(pos_p + offset_p) = i1(p); + dx1_p(pos_p + offset_p) = dx1(p); + i2_p(pos_p + offset_p) = i2(p); + dx2_p(pos_p + offset_p) = dx2(p); + phi_p(pos_p + offset_p) = phi(p); + ux1_p(pos_p + offset_p) = px * pair_fac; + ux2_p(pos_p + offset_p) = py * pair_fac; + ux3_p(pos_p + offset_p) = pz * pair_fac; + weight_p(pos_p + offset_p) = weight(p); + tag_p(pos_p + offset_p) = ParticleTag::alive; + + ux1(p) *= new_fac; + ux2(p) *= new_fac; + ux3(p) *= new_fac; + } + }); + + auto elec_ind_h = Kokkos::create_mirror(elec_ind); + Kokkos::deep_copy(elec_ind_h, elec_ind); + if (s == 0) { + species2_e.set_npart(offset_e + elec_ind_h()); + } else { + species3_e.set_npart(offset_e + elec_ind_h()); + } + auto pos_ind_h = Kokkos::create_mirror(pos_ind); + Kokkos::deep_copy(pos_ind_h, pos_ind); + if (s == 0) { + species2_p.set_npart(offset_p + pos_ind_h()); + } else { + species3_p.set_npart(offset_p + pos_ind_h()); } - } // Ad-hoc PP kernel } - + } // Ad-hoc PP kernel + } }; } // namespace user diff --git a/setups/srpic/magnetosphere/pgen.hpp b/setups/srpic/magnetosphere/pgen.hpp index 681c4d6d1..64fe13cfe 100644 --- a/setups/srpic/magnetosphere/pgen.hpp +++ b/setups/srpic/magnetosphere/pgen.hpp @@ -86,9 +86,13 @@ namespace user { inline PGen() {} - auto FieldDriver(real_t time) const -> DriveFields { + auto AtmFields(real_t time) const -> DriveFields { return DriveFields { time, Bsurf, Rstar, Omega }; } + + auto MatchFields(real_t) const -> InitFields { + return InitFields { Bsurf, Rstar }; + } }; } // namespace user From 65e6d7794db0f70b01fd9a3045dcf8b5d735feab Mon Sep 17 00:00:00 2001 From: hayk Date: Tue, 4 Feb 2025 08:48:55 -0500 Subject: [PATCH 113/124] added siddhant --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 26ca92072..e0d1fe016 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ Our [detailed documentation](https://entity-toolkit.github.io/) includes everyth 🍡 __Benjamin Crinquand__ {[@bcrinquand](https://github.com/bcrinquand): GRPIC, cubed-sphere} +:radio: __Siddhant Solanki__ {[@sidruns30](https://github.com/sidruns30): framework} + 🀷 __Arno Vanthieghem__ {[@vanthieg](https://github.com/vanthieg): framework, PIC} 😺 __Muni Zhou__ {[@munizhou](https://github.com/munizhou): PIC} From e644b65600747323d108e88d8ed488ad5f74ad2b Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 9 Mar 2025 10:24:03 -0400 Subject: [PATCH 114/124] nix cfgs --- dev/nix/adios2.nix | 4 +-- dev/nix/kokkos.nix | 17 ++++++------ dev/nix/shell.nix | 66 +++++++++++++++++++++++++++++++++++++++------- 3 files changed, 66 insertions(+), 21 deletions(-) diff --git a/dev/nix/adios2.nix b/dev/nix/adios2.nix index 8ec1fd36c..7218f894f 100644 --- a/dev/nix/adios2.nix +++ b/dev/nix/adios2.nix @@ -1,7 +1,7 @@ { pkgs ? import { }, - hdf5 ? false, - mpi ? false, + hdf5, + mpi, }: let diff --git a/dev/nix/kokkos.nix b/dev/nix/kokkos.nix index cfe583c7a..6271604c5 100644 --- a/dev/nix/kokkos.nix +++ b/dev/nix/kokkos.nix @@ -1,11 +1,10 @@ { pkgs ? import { }, - arch ? "native", - gpu ? "none", + arch, + gpu, }: let - gpuUpper = pkgs.lib.toUpper gpu; name = "kokkos"; version = "4.5.01"; compilerPkgs = { @@ -30,10 +29,10 @@ let }; getArch = _: - if gpu != "none" && arch == "native" then + if gpu != "NONE" && arch == "NATIVE" then throw "Please specify an architecture when the GPU support is enabled. Available architectures: https://kokkos.org/kokkos-core-wiki/keywords.html#architectures" else - pkgs.lib.toUpper arch; + arch; in pkgs.stdenv.mkDerivation { @@ -41,7 +40,7 @@ pkgs.stdenv.mkDerivation { version = "${version}"; src = pkgs.fetchgit { url = "https://github.com/kokkos/kokkos/"; - rev = "v${version}"; + rev = "${version}"; sha256 = "sha256-cI2p+6J+8BRV5fXTDxxHTfh6P5PeeLUiF73o5zVysHQ="; }; @@ -49,16 +48,16 @@ pkgs.stdenv.mkDerivation { cmake ]; - propagatedBuildInputs = compilerPkgs.${gpuUpper}; + propagatedBuildInputs = compilerPkgs.${gpu}; cmakeFlags = [ "-D CMAKE_CXX_STANDARD=17" "-D CMAKE_CXX_EXTENSIONS=OFF" "-D CMAKE_POSITION_INDEPENDENT_CODE=TRUE" "-D Kokkos_ARCH_${getArch { }}=ON" - (if gpu != "none" then "-D Kokkos_ENABLE_${gpuUpper}=ON" else "") + (if gpu != "none" then "-D Kokkos_ENABLE_${gpu}=ON" else "") "-D CMAKE_BUILD_TYPE=Release" - ] ++ cmakeFlags.${gpuUpper}; + ] ++ cmakeFlags.${gpu}; enableParallelBuilding = true; } diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix index 1f21e82b0..01d80298b 100644 --- a/dev/nix/shell.nix +++ b/dev/nix/shell.nix @@ -1,15 +1,45 @@ { pkgs ? import { }, + gpu ? "NONE", + arch ? "NATIVE", + hdf5 ? true, mpi ? false, - hdf5 ? false, - gpu ? "none", - arch ? "native", }: let + gpuUpper = pkgs.lib.toUpper gpu; + archUpper = pkgs.lib.toUpper arch; name = "entity-dev"; adios2Pkg = (pkgs.callPackage ./adios2.nix { inherit pkgs mpi hdf5; }); - kokkosPkg = (pkgs.callPackage ./kokkos.nix { inherit pkgs arch gpu; }); + kokkosPkg = ( + pkgs.callPackage ./kokkos.nix { + inherit pkgs; + arch = archUpper; + gpu = gpuUpper; + } + ); + envVars = { + compiler = rec { + NONE = { + CXX = "g++"; + CC = "gcc"; + }; + HIP = { + CXX = "hipcc"; + CC = "hipcc"; + }; + CUDA = NONE; + }; + kokkos = { + HIP = { + Kokkos_ENABLE_HIP = "ON"; + }; + CUDA = { + Kokkos_ENABLE_CUDA = "ON"; + }; + NONE = { }; + }; + }; in pkgs.mkShell { name = "${name}-env"; @@ -39,11 +69,27 @@ pkgs.mkShell { pkgs.zlib ]); - shellHook = '' - BLUE='\033[0;34m' - NC='\033[0m' + shellHook = + '' + BLUE='\033[0;34m' + NC='\033[0m' + + echo "following environment variables are set:" + '' + + pkgs.lib.concatStringsSep "" ( + pkgs.lib.mapAttrsToList ( + category: vars: + pkgs.lib.concatStringsSep "" ( + pkgs.lib.mapAttrsToList (name: value: '' + export ${name}=${value} + echo -e " ''\${BLUE}${name}''\${NC}=${value}" + '') vars.${gpuUpper} + ) + ) envVars + ) + + '' + echo "" + echo -e "${name} nix-shell activated" + ''; - echo "" - echo -e "${name} nix-shell activated" - ''; } From 158c385c2e69351ad228eace4862aca425061ec6 Mon Sep 17 00:00:00 2001 From: jmahlmann Date: Wed, 21 Aug 2024 20:15:11 -0400 Subject: [PATCH 115/124] Add bulk velocity as moment output. --- src/framework/domain/output.cpp | 8 ++++ src/global/enums.h | 5 ++- src/global/tests/enums.cpp | 2 +- src/kernels/particle_moments.hpp | 65 +++++++++++++++++++++++++++++++- src/output/fields.cpp | 3 ++ src/output/fields.h | 4 +- 6 files changed, 81 insertions(+), 6 deletions(-) diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 6961d2826..8283be825 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -328,6 +328,14 @@ namespace ntt { {}, local_domain->fields.bckp, c); + } else if (fld.id() == FldsID::V) { + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + fld.species, + fld.comp[0], + local_domain->fields.bckp, + c); } else { raise::Error("Wrong moment requested for output", HERE); } diff --git a/src/global/enums.h b/src/global/enums.h index 8f2495c13..4d2348244 100644 --- a/src/global/enums.h +++ b/src/global/enums.h @@ -288,16 +288,17 @@ namespace ntt { N = 12, Nppc = 13, Custom = 14, + V = 15, }; constexpr FldsID(uint8_t c) : enums_hidden::BaseEnum { c } {} static constexpr type variants[] = { E, divE, D, divD, B, H, J, - A, T, Rho, Charge, N, Nppc, Custom }; + A, T, Rho, Charge, N, Nppc, Custom , V}; static constexpr const char* lookup[] = { "e", "dive", "d", "divd", "b", "h", "j", "a", "t", "rho", "charge", "n", - "nppc", "custom" }; + "nppc", "custom", "v" }; static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); }; diff --git a/src/global/tests/enums.cpp b/src/global/tests/enums.cpp index 7785ec1a3..d5eeb76e2 100644 --- a/src/global/tests/enums.cpp +++ b/src/global/tests/enums.cpp @@ -68,7 +68,7 @@ auto main() -> int { enum_str_t all_out_flds = { "e", "dive", "d", "divd", "b", "h", "j", "a", "t", "rho", - "charge", "n", "nppc", "custom" }; + "charge", "n", "nppc", "custom" , "v"}; checkEnum(all_coords); checkEnum(all_metrics); diff --git a/src/kernels/particle_moments.hpp b/src/kernels/particle_moments.hpp index 0621646ad..3c6cd37c9 100644 --- a/src/kernels/particle_moments.hpp +++ b/src/kernels/particle_moments.hpp @@ -41,7 +41,7 @@ namespace kernel { static constexpr auto D = M::Dim; static_assert((F == FldsID::Rho) || (F == FldsID::Charge) || - (F == FldsID::N) || (F == FldsID::Nppc) || (F == FldsID::T), + (F == FldsID::N) || (F == FldsID::Nppc) || (F == FldsID::T) || (F == FldsID::V), "Invalid field ID"); const unsigned short c1, c2; @@ -89,7 +89,7 @@ namespace kernel { std::size_t ni2, real_t inv_n0, unsigned short window) - : c1 { (components.size() == 2) ? components[0] + : c1 { (components.size() > 0) ? components[0] : static_cast(0) } , c2 { (components.size() == 2) ? components[1] : static_cast(0) } @@ -205,6 +205,67 @@ namespace kernel { coeff = contrib; } + if constexpr (F == FldsID::V) { + real_t gamma { ZERO }; + // for stress-energy tensor + vec_t u_Phys { ZERO }; + if constexpr (S == SimEngine::SRPIC) { + // SR + // stress-energy tensor for SR is computed in the tetrad (hatted) basis + if constexpr (M::CoordType == Coord::Cart) { + u_Phys[0] = ux1(p); + u_Phys[1] = ux2(p); + u_Phys[2] = ux3(p); + } else { + static_assert(D != Dim::_1D, "non-Cartesian SRPIC 1D"); + coord_t x_Code { ZERO }; + x_Code[0] = static_cast(i1(p)) + static_cast(dx1(p)); + x_Code[1] = static_cast(i2(p)) + static_cast(dx2(p)); + if constexpr (D == Dim::_3D) { + x_Code[2] = static_cast(i3(p)) + static_cast(dx3(p)); + } else { + x_Code[2] = phi(p); + } + metric.template transform_xyz( + x_Code, + { ux1(p), ux2(p), ux3(p) }, + u_Phys); + } + if (mass == ZERO) { + gamma = NORM(u_Phys[0], u_Phys[1], u_Phys[2]); + } else { + gamma = math::sqrt(ONE + NORM_SQR(u_Phys[0], u_Phys[1], u_Phys[2])); + } + } else { + // GR + // stress-energy tensor for GR is computed in contravariant basis + static_assert(D != Dim::_1D, "GRPIC 1D"); + coord_t x_Code { ZERO }; + x_Code[0] = static_cast(i1(p)) + static_cast(dx1(p)); + x_Code[1] = static_cast(i2(p)) + static_cast(dx2(p)); + if constexpr (D == Dim::_3D) { + x_Code[2] = static_cast(i3(p)) + static_cast(dx3(p)); + } + vec_t u_Cntrv { ZERO }; + // compute u_i u^i for energy + metric.template transform(x_Code, + { ux1(p), ux2(p), ux3(p) }, + u_Cntrv); + gamma = u_Cntrv[0] * ux1(p) + u_Cntrv[1] * ux2(p) + u_Cntrv[2] * ux3(p); + if (mass == ZERO) { + gamma = math::sqrt(gamma); + } else { + gamma = math::sqrt(ONE + gamma); + } + metric.template transform(x_Code, u_Cntrv, u_Phys); + } + // compute the corresponding moment + coeff = u_Phys[c1 - 1] / gamma; + } else { + // for other cases, use the `contrib` defined above + coeff = contrib; + } + if constexpr (F != FldsID::Nppc) { // for nppc calculation ... // ... do not take volume, weights or smoothing into account diff --git a/src/output/fields.cpp b/src/output/fields.cpp index aa5a752d4..0c2ea5e50 100644 --- a/src/output/fields.cpp +++ b/src/output/fields.cpp @@ -44,6 +44,9 @@ namespace out { } else if (id() == FldsID::T) { // energy-momentum tensor comp = InterpretComponents({ name.substr(1, 1), name.substr(2, 1) }); + } else if (id() == FldsID::V) { + // energy-momentum tensor + comp = InterpretComponents({ name.substr(1, 1) }); } else { // scalar (Rho, divE, Custom, etc.) comp = {}; diff --git a/src/output/fields.h b/src/output/fields.h index a520a246d..4fde18ed2 100644 --- a/src/output/fields.h +++ b/src/output/fields.h @@ -43,7 +43,7 @@ namespace out { [[nodiscard]] auto is_moment() const -> bool { return (id() == FldsID::T || id() == FldsID::Rho || id() == FldsID::Nppc || - id() == FldsID::N || id() == FldsID::Charge); + id() == FldsID::N || id() == FldsID::Charge || id() == FldsID::V); } [[nodiscard]] @@ -94,6 +94,8 @@ namespace out { tmp += m_name.substr(1, 2); } else if (id() == FldsID::A) { tmp += "3"; + } else if (id() == FldsID::V) { + tmp += m_name.substr(1, 1); } else if (is_field()) { tmp += "i"; } From 334a8b18de7341586321e0989ffa565852e87ba7 Mon Sep 17 00:00:00 2001 From: jmahlmann Date: Mon, 26 Aug 2024 11:46:57 -0400 Subject: [PATCH 116/124] Update input. --- input.example.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/input.example.toml b/input.example.toml index 788c30685..a49067811 100644 --- a/input.example.toml +++ b/input.example.toml @@ -340,7 +340,7 @@ # Field quantities to output: # @type: array of strings # @valid: fields: "E", "B", "J", "divE" - # @valid: moments: "Rho", "Charge", "N", "Nppc", "T0i", "Tij" + # @valid: moments: "Rho", "Charge", "N", "Nppc", "T0i", "Tij", "Vi" # @valid: for GR: "D", "H", "divD", "A" # @default: [] # @note: For T, you can use unspecified indices, e.g., Tij, T0i, or specific ones, e.g., Ttt, T00, T02, T23 From d0c98aede75239499743b63c409572cb1cc50d16 Mon Sep 17 00:00:00 2001 From: jmahlmann Date: Wed, 16 Oct 2024 08:53:39 -0400 Subject: [PATCH 117/124] Bugfix in moment calculation. --- src/kernels/particle_moments.hpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/kernels/particle_moments.hpp b/src/kernels/particle_moments.hpp index 3c6cd37c9..368873cde 100644 --- a/src/kernels/particle_moments.hpp +++ b/src/kernels/particle_moments.hpp @@ -261,10 +261,7 @@ namespace kernel { } // compute the corresponding moment coeff = u_Phys[c1 - 1] / gamma; - } else { - // for other cases, use the `contrib` defined above - coeff = contrib; - } + } if constexpr (F != FldsID::Nppc) { // for nppc calculation ... From e7976ca49c5f3f2e00f4c7316efba5517b5840b0 Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 9 Mar 2025 10:57:45 -0400 Subject: [PATCH 118/124] minor bookkeeping for 3vel output --- src/framework/domain/output.cpp | 12 ++++++------ src/global/enums.h | 19 ++++++++++--------- src/global/tests/enums.cpp | 6 +++--- src/output/fields.cpp | 17 ++++++++++++----- src/output/fields.h | 4 +--- 5 files changed, 32 insertions(+), 26 deletions(-) diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 8283be825..40871dd2a 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -330,12 +330,12 @@ namespace ntt { c); } else if (fld.id() == FldsID::V) { ComputeMoments(params, - local_domain->mesh, - local_domain->species, - fld.species, - fld.comp[0], - local_domain->fields.bckp, - c); + local_domain->mesh, + local_domain->species, + fld.species, + fld.comp[0], + local_domain->fields.bckp, + c); } else { raise::Error("Wrong moment requested for output", HERE); } diff --git a/src/global/enums.h b/src/global/enums.h index 4d2348244..6c6a4ec8f 100644 --- a/src/global/enums.h +++ b/src/global/enums.h @@ -13,7 +13,7 @@ * - enum ntt::PrtlPusher // boris, vay, photon, none * - enum ntt::Cooling // synchrotron, none * - enum ntt::FldsID // e, dive, d, divd, b, h, j, - * a, t, rho, charge, n, nppc, custom + * a, t, rho, charge, n, nppc, v, custom * @namespaces: * - ntt:: * @note Enums of the same type can be compared with each other and with strings @@ -287,18 +287,19 @@ namespace ntt { Charge = 11, N = 12, Nppc = 13, - Custom = 14, - V = 15, + V = 14, + Custom = 15, }; constexpr FldsID(uint8_t c) : enums_hidden::BaseEnum { c } {} - static constexpr type variants[] = { E, divE, D, divD, B, H, J, - A, T, Rho, Charge, N, Nppc, Custom , V}; - static constexpr const char* lookup[] = { "e", "dive", "d", "divd", - "b", "h", "j", "a", - "t", "rho", "charge", "n", - "nppc", "custom", "v" }; + static constexpr type variants[] = { E, divE, D, divD, B, + H, J, A, T, Rho, + Charge, N, Nppc, V, Custom }; + static constexpr const char* lookup[] = { "e", "dive", "d", "divd", + "b", "h", "j", "a", + "t", "rho", "charge", "n", + "nppc", "v", "custom" }; static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); }; diff --git a/src/global/tests/enums.cpp b/src/global/tests/enums.cpp index d5eeb76e2..673efaf34 100644 --- a/src/global/tests/enums.cpp +++ b/src/global/tests/enums.cpp @@ -66,9 +66,9 @@ auto main() -> int { enum_str_t all_particle_pushers = { "boris", "vay", "photon", "none" }; enum_str_t all_coolings = { "synchrotron", "none" }; - enum_str_t all_out_flds = { "e", "dive", "d", "divd", "b", - "h", "j", "a", "t", "rho", - "charge", "n", "nppc", "custom" , "v"}; + enum_str_t all_out_flds = { "e", "dive", "d", "divd", "b", + "h", "j", "a", "t", "rho", + "charge", "n", "nppc", "v", "custom" }; checkEnum(all_coords); checkEnum(all_metrics); diff --git a/src/output/fields.cpp b/src/output/fields.cpp index 0c2ea5e50..678ca20f6 100644 --- a/src/output/fields.cpp +++ b/src/output/fields.cpp @@ -29,14 +29,24 @@ namespace out { } else { m_id = FldsID::Custom; } + // check compatibility + raise::ErrorIf(is_gr_aux_field() and S != SimEngine::GRPIC, + "GR auxiliary field output not supported for non-GRPIC", + HERE); + raise::ErrorIf(id() == FldsID::A and S != SimEngine::GRPIC, + "Output of A_phi not supported for non-GRPIC", + HERE); + raise::ErrorIf(id() == FldsID::V and S == SimEngine::GRPIC, + "Output of bulk 3-vel not supported for GRPIC", + HERE); // determine the species and components to output if (is_moment()) { species = InterpretSpecies(name); } else { species = {}; } - if (is_field() || is_current()) { - // always write all the field/current components + if (is_field() || is_current() || id() == FldsID::V) { + // always write all the field/current/bulk vel components comp = { { 1 }, { 2 }, { 3 } }; } else if (id() == FldsID::A) { // only write A3 @@ -44,9 +54,6 @@ namespace out { } else if (id() == FldsID::T) { // energy-momentum tensor comp = InterpretComponents({ name.substr(1, 1), name.substr(2, 1) }); - } else if (id() == FldsID::V) { - // energy-momentum tensor - comp = InterpretComponents({ name.substr(1, 1) }); } else { // scalar (Rho, divE, Custom, etc.) comp = {}; diff --git a/src/output/fields.h b/src/output/fields.h index 4fde18ed2..0e8e31d08 100644 --- a/src/output/fields.h +++ b/src/output/fields.h @@ -94,9 +94,7 @@ namespace out { tmp += m_name.substr(1, 2); } else if (id() == FldsID::A) { tmp += "3"; - } else if (id() == FldsID::V) { - tmp += m_name.substr(1, 1); - } else if (is_field()) { + } else if (is_field() || id() == FldsID::V) { tmp += "i"; } if (species.size() > 0) { From c9d69ce682e660e7ed0f3c43d414856717094550 Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 9 Mar 2025 14:26:41 -0400 Subject: [PATCH 119/124] implemented and tested bulk V output --- src/framework/domain/output.cpp | 69 ++++++++++---- src/kernels/particle_moments.hpp | 154 ++++++++++++++++++++----------- src/output/fields.cpp | 3 - 3 files changed, 151 insertions(+), 75 deletions(-) diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 40871dd2a..685099e55 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -328,14 +328,6 @@ namespace ntt { {}, local_domain->fields.bckp, c); - } else if (fld.id() == FldsID::V) { - ComputeMoments(params, - local_domain->mesh, - local_domain->species, - fld.species, - fld.comp[0], - local_domain->fields.bckp, - c); } else { raise::Error("Wrong moment requested for output", HERE); } @@ -365,16 +357,35 @@ namespace ntt { if (fld.is_moment()) { for (auto i = 0; i < 3; ++i) { const auto c = static_cast(addresses[i]); - raise::ErrorIf(fld.comp[i].size() != 2, - "Wrong # of components requested for moment", - HERE); - ComputeMoments(params, - local_domain->mesh, - local_domain->species, - fld.species, - fld.comp[i], - local_domain->fields.bckp, - c); + if (fld.id() == FldsID::T) { + raise::ErrorIf(fld.comp[i].size() != 2, + "Wrong # of components requested for moment", + HERE); + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + fld.species, + fld.comp[i], + local_domain->fields.bckp, + c); + } else if (fld.id() == FldsID::V) { + raise::ErrorIf(fld.comp[i].size() != 1, + "Wrong # of components requested for 3vel", + HERE); + if constexpr (S == SimEngine::SRPIC) { + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + fld.species, + fld.comp[i], + local_domain->fields.bckp, + c); + } else { + raise::Error("Bulk velocity not supported for GRPIC", HERE); + } + } else { + raise::Error("Wrong moment requested for output", HERE); + } } raise::ErrorIf(addresses[1] - addresses[0] != addresses[2] - addresses[1], @@ -383,6 +394,28 @@ namespace ntt { SynchronizeFields(*local_domain, Comm::Bckp, { addresses[0], addresses[2] + 1 }); + if constexpr (S == SimEngine::SRPIC) { + if (fld.id() == FldsID::V) { + // normalize 3vel * rho (combuted above) by rho + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + fld.species, + {}, + local_domain->fields.bckp, + 0u); + SynchronizeFields(*local_domain, Comm::Bckp, { 0, 1 }); + Kokkos::parallel_for("NormalizeVectorByRho", + local_domain->mesh.rangeActiveCells(), + kernel::NormalizeVectorByRho_kernel( + local_domain->fields.bckp, + local_domain->fields.bckp, + 0, + addresses[0], + addresses[1], + addresses[2])); + } + } } else { // copy fields to bckp (:, 0, 1, 2) // if as-is specified ==> copy directly to 3, 4, 5 diff --git a/src/kernels/particle_moments.hpp b/src/kernels/particle_moments.hpp index 368873cde..e52c68486 100644 --- a/src/kernels/particle_moments.hpp +++ b/src/kernels/particle_moments.hpp @@ -14,6 +14,7 @@ #include "global.h" #include "arch/kokkos_aliases.h" +#include "utils/comparators.h" #include "utils/error.h" #include "utils/numeric.h" @@ -40,8 +41,10 @@ namespace kernel { static_assert(M::is_metric, "M must be a metric class"); static constexpr auto D = M::Dim; - static_assert((F == FldsID::Rho) || (F == FldsID::Charge) || - (F == FldsID::N) || (F == FldsID::Nppc) || (F == FldsID::T) || (F == FldsID::V), + static_assert(!((S == SimEngine::GRPIC) && (F == FldsID::V)), + "Bulk velocity not supported for GRPIC"); + static_assert((F == FldsID::Rho) || (F == FldsID::Charge) || (F == FldsID::N) || + (F == FldsID::Nppc) || (F == FldsID::T) || (F == FldsID::V), "Invalid field ID"); const unsigned short c1, c2; @@ -90,7 +93,7 @@ namespace kernel { real_t inv_n0, unsigned short window) : c1 { (components.size() > 0) ? components[0] - : static_cast(0) } + : static_cast(0) } , c2 { (components.size() == 2) ? components[1] : static_cast(0) } , Buff { scatter_buff } @@ -200,68 +203,38 @@ namespace kernel { coeff *= u_Phys[c - 1]; } } - } else { - // for other cases, use the `contrib` defined above - coeff = contrib; - } - - if constexpr (F == FldsID::V) { + } else if constexpr (F == FldsID::V) { real_t gamma { ZERO }; - // for stress-energy tensor + // for bulk 3vel (tetrad basis) vec_t u_Phys { ZERO }; - if constexpr (S == SimEngine::SRPIC) { - // SR - // stress-energy tensor for SR is computed in the tetrad (hatted) basis - if constexpr (M::CoordType == Coord::Cart) { - u_Phys[0] = ux1(p); - u_Phys[1] = ux2(p); - u_Phys[2] = ux3(p); - } else { - static_assert(D != Dim::_1D, "non-Cartesian SRPIC 1D"); - coord_t x_Code { ZERO }; - x_Code[0] = static_cast(i1(p)) + static_cast(dx1(p)); - x_Code[1] = static_cast(i2(p)) + static_cast(dx2(p)); - if constexpr (D == Dim::_3D) { - x_Code[2] = static_cast(i3(p)) + static_cast(dx3(p)); - } else { - x_Code[2] = phi(p); - } - metric.template transform_xyz( - x_Code, - { ux1(p), ux2(p), ux3(p) }, - u_Phys); - } - if (mass == ZERO) { - gamma = NORM(u_Phys[0], u_Phys[1], u_Phys[2]); - } else { - gamma = math::sqrt(ONE + NORM_SQR(u_Phys[0], u_Phys[1], u_Phys[2])); - } + if constexpr (M::CoordType == Coord::Cart) { + u_Phys[0] = ux1(p); + u_Phys[1] = ux2(p); + u_Phys[2] = ux3(p); } else { - // GR - // stress-energy tensor for GR is computed in contravariant basis - static_assert(D != Dim::_1D, "GRPIC 1D"); - coord_t x_Code { ZERO }; + coord_t x_Code { ZERO }; x_Code[0] = static_cast(i1(p)) + static_cast(dx1(p)); x_Code[1] = static_cast(i2(p)) + static_cast(dx2(p)); if constexpr (D == Dim::_3D) { x_Code[2] = static_cast(i3(p)) + static_cast(dx3(p)); - } - vec_t u_Cntrv { ZERO }; - // compute u_i u^i for energy - metric.template transform(x_Code, - { ux1(p), ux2(p), ux3(p) }, - u_Cntrv); - gamma = u_Cntrv[0] * ux1(p) + u_Cntrv[1] * ux2(p) + u_Cntrv[2] * ux3(p); - if (mass == ZERO) { - gamma = math::sqrt(gamma); } else { - gamma = math::sqrt(ONE + gamma); + x_Code[2] = phi(p); } - metric.template transform(x_Code, u_Cntrv, u_Phys); + metric.template transform_xyz(x_Code, + { ux1(p), ux2(p), ux3(p) }, + u_Phys); + } + if (mass == ZERO) { + gamma = NORM(u_Phys[0], u_Phys[1], u_Phys[2]); + } else { + gamma = math::sqrt(ONE + NORM_SQR(u_Phys[0], u_Phys[1], u_Phys[2])); } // compute the corresponding moment - coeff = u_Phys[c1 - 1] / gamma; - } + coeff = (mass == ZERO ? ONE : mass) * u_Phys[c1 - 1] / gamma; + } else { + // for other cases, use the `contrib` defined above + coeff = contrib; + } if constexpr (F != FldsID::Nppc) { // for nppc calculation ... @@ -346,6 +319,79 @@ namespace kernel { } }; + template + class NormalizeVectorByRho_kernel { + const ndfield_t Rho; + ndfield_t Vector; + const unsigned short c_rho, c_v1, c_v2, c_v3; + + public: + NormalizeVectorByRho_kernel(const ndfield_t& rho, + const ndfield_t& vector, + unsigned short crho, + unsigned short cv1, + unsigned short cv2, + unsigned short cv3) + : Rho { rho } + , Vector { vector } + , c_rho { crho } + , c_v1 { cv1 } + , c_v2 { cv2 } + , c_v3 { cv3 } { + raise::ErrorIf(c_rho >= N or c_v1 >= N or c_v2 >= N or c_v3 >= N, + "Invalid component index", + HERE); + raise::ErrorIf(c_rho == c_v1 or c_rho == c_v2 or c_rho == c_v3, + "Invalid component index", + HERE); + raise::ErrorIf(c_v1 == c_v2 or c_v1 == c_v3 or c_v2 == c_v3, + "Invalid component index", + HERE); + } + + Inline void operator()(index_t i1) const { + if constexpr (D == Dim::_1D) { + if (not cmp::AlmostZero(Rho(i1, c_rho))) { + Vector(i1, c_v1) /= Rho(i1, c_rho); + Vector(i1, c_v2) /= Rho(i1, c_rho); + Vector(i1, c_v3) /= Rho(i1, c_rho); + } + } else { + raise::KernelError( + HERE, + "1D implementation of NormalizeVectorByRho_kernel called for non-1D"); + } + } + + Inline void operator()(index_t i1, index_t i2) const { + if constexpr (D == Dim::_2D) { + if (not cmp::AlmostZero(Rho(i1, i2, c_rho))) { + Vector(i1, i2, c_v1) /= Rho(i1, i2, c_rho); + Vector(i1, i2, c_v2) /= Rho(i1, i2, c_rho); + Vector(i1, i2, c_v3) /= Rho(i1, i2, c_rho); + } + } else { + raise::KernelError( + HERE, + "2D implementation of NormalizeVectorByRho_kernel called for non-2D"); + } + } + + Inline void operator()(index_t i1, index_t i2, index_t i3) const { + if constexpr (D == Dim::_3D) { + if (not cmp::AlmostZero(Rho(i1, i2, i3, c_rho))) { + Vector(i1, i2, i3, c_v1) /= Rho(i1, i2, i3, c_rho); + Vector(i1, i2, i3, c_v2) /= Rho(i1, i2, i3, c_rho); + Vector(i1, i2, i3, c_v3) /= Rho(i1, i2, i3, c_rho); + } + } else { + raise::KernelError( + HERE, + "3D implementation of NormalizeVectorByRho_kernel called for non-3D"); + } + } + }; + } // namespace kernel #endif // KERNELS_PARTICLE_MOMENTS_HPP diff --git a/src/output/fields.cpp b/src/output/fields.cpp index 678ca20f6..091f04cd9 100644 --- a/src/output/fields.cpp +++ b/src/output/fields.cpp @@ -30,9 +30,6 @@ namespace out { m_id = FldsID::Custom; } // check compatibility - raise::ErrorIf(is_gr_aux_field() and S != SimEngine::GRPIC, - "GR auxiliary field output not supported for non-GRPIC", - HERE); raise::ErrorIf(id() == FldsID::A and S != SimEngine::GRPIC, "Output of A_phi not supported for non-GRPIC", HERE); From ebe60809b3ab5021dd690f97f6a94ef6bdfd0d41 Mon Sep 17 00:00:00 2001 From: hayk Date: Tue, 1 Apr 2025 13:34:59 -0400 Subject: [PATCH 120/124] contribs --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index e0d1fe016..7287d52db 100644 --- a/README.md +++ b/README.md @@ -24,10 +24,14 @@ Our [detailed documentation](https://entity-toolkit.github.io/) includes everyth ## Contributors (alphabetical) +🎸 __Ludwig BΓΆss__ {[@LudwigBoess](https://github.com/LudwigBoess): PIC, framework} + πŸ‘€ __Yangyang Cai__ {[@StaticObserver](https://github.com/StaticObserver): GRPIC} 🍡 __Benjamin Crinquand__ {[@bcrinquand](https://github.com/bcrinquand): GRPIC, cubed-sphere} +πŸš‚ __Evgeny Gorbunov__ {[@Alcauchy](https://github.com/Alcauchy): PIC, framework} + :radio: __Siddhant Solanki__ {[@sidruns30](https://github.com/sidruns30): framework} 🀷 __Arno Vanthieghem__ {[@vanthieg](https://github.com/vanthieg): framework, PIC} From 08de6896e6af7761a2ed31f0979cf373435825b9 Mon Sep 17 00:00:00 2001 From: hayk Date: Tue, 1 Apr 2025 13:36:29 -0400 Subject: [PATCH 121/124] rm tasks (RUNTEST) --- TASKLIST.md | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 TASKLIST.md diff --git a/TASKLIST.md b/TASKLIST.md deleted file mode 100644 index c12f60f4c..000000000 --- a/TASKLIST.md +++ /dev/null @@ -1,9 +0,0 @@ -### Performance improvements to try - -- [ ] removing temporary variables in interpolation -- [ ] passing by value vs const ref in metric -- [ ] return physical coords one-by-one instead of by passing full vector - -### Things to look into - -1. _h fields in mpi communication From 2132b3333bbc3168d31b5559b52626ead96d66b9 Mon Sep 17 00:00:00 2001 From: hayk Date: Tue, 1 Apr 2025 13:42:15 -0400 Subject: [PATCH 122/124] bulk grpic prohibit (RUNTEST) --- src/framework/domain/output.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 9cca7cf11..8fb756130 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -329,13 +329,17 @@ namespace ntt { local_domain->fields.bckp, c); } else if (fld.id() == FldsID::V) { - ComputeMoments(params, - local_domain->mesh, - local_domain->species, - fld.species, - fld.comp[0], - local_domain->fields.bckp, - c); + if constexpr (S != SimEngine::GRPIC) { + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + fld.species, + fld.comp[0], + local_domain->fields.bckp, + c); + } else { + raise::Error("Bulk velocity not supported for GRPIC", HERE); + } } else { raise::Error("Wrong moment requested for output", HERE); } From 0f2c91ea3a5b7b9ed5f6b6134f830e21956c9122 Mon Sep 17 00:00:00 2001 From: hayk Date: Tue, 1 Apr 2025 13:45:30 -0400 Subject: [PATCH 123/124] minor bug in tests (RUNTEST) --- src/output/tests/writer-mpi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/output/tests/writer-mpi.cpp b/src/output/tests/writer-mpi.cpp index 6ab16305f..0c3000f3a 100644 --- a/src/output/tests/writer-mpi.cpp +++ b/src/output/tests/writer-mpi.cpp @@ -60,7 +60,7 @@ auto main(int argc, char* argv[]) -> int { { // write auto writer = out::Writer(); - writer.init(&adios, "hdf5", "test"); + writer.init(&adios, "hdf5", "test", false); writer.defineMeshLayout({ static_cast(mpi_size) * nx1 }, { static_cast(mpi_rank) * nx1 }, { nx1 }, From a22cd9684f722a1daf83469c454c46af9a98525e Mon Sep 17 00:00:00 2001 From: hayk Date: Tue, 1 Apr 2025 13:48:10 -0400 Subject: [PATCH 124/124] minor bug in mpi-output tests (RUNTEST) --- src/output/tests/writer-mpi.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/output/tests/writer-mpi.cpp b/src/output/tests/writer-mpi.cpp index 0c3000f3a..f6d3ee88a 100644 --- a/src/output/tests/writer-mpi.cpp +++ b/src/output/tests/writer-mpi.cpp @@ -74,13 +74,13 @@ auto main(int argc, char* argv[]) -> int { field_names.push_back(writer.fieldWriters()[0].name(i)); addresses.push_back(i); } - writer.beginWriting(0, 0.0); + writer.beginWriting(WriteMode::Fields, 0, 0.0); writer.writeField(field_names, field, addresses); - writer.endWriting(); + writer.endWriting(WriteMode::Fields); - writer.beginWriting(1, 0.1); + writer.beginWriting(WriteMode::Fields, 1, 0.1); writer.writeField(field_names, field, addresses); - writer.endWriting(); + writer.endWriting(WriteMode::Fields); adios.ExitComputationBlock(); }