From e58be3fedfdd1508fced336a0e4154f5b3478b26 Mon Sep 17 00:00:00 2001 From: StaticObserver Date: Fri, 21 Jun 2024 11:32:43 +0800 Subject: [PATCH 01/28] fix bug in metadomain.cpp; link library stdc++fs --- src/CMakeLists.txt | 3 ++- src/framework/domain/metadomain.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 162d48df9..c15a90795 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,4 +37,5 @@ add_subdirectory(${SRC_DIR}/../setups ${CMAKE_CURRENT_BINARY_DIR}/setups) set(libs ntt_global ntt_framework ntt_metrics ntt_engines ntt_pgen) add_dependencies(${ENTITY} ${libs}) -target_link_libraries(${ENTITY} PUBLIC ${libs}) \ No newline at end of file +target_link_libraries(${ENTITY} PUBLIC ${libs}) +target_link_libraries(${ENTITY} PUBLIC stdc++fs) \ No newline at end of file diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index 86a9420fd..584bfa551 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -375,7 +375,7 @@ namespace ntt { template void Metadomain::metricCompatibilityCheck() const { const auto dx_min = g_mesh.metric.dxMin(); - auto dx_min_from_domains = INFINITY; + auto dx_min_from_domains = std::numeric_limits::infinity(); for (unsigned int idx { 0 }; idx < g_ndomains; ++idx) { const auto& current_domain = g_subdomains[idx]; const auto current_dx_min = current_domain.mesh.metric.dxMin(); From a98f023247bda5a90b24213b3ac791a07e9fb921 Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 27 Jun 2024 15:42:06 -0400 Subject: [PATCH 02/28] Update CMakeLists.txt putting stdc++fs only where necessary --- src/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c15a90795..38478ad11 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,5 +37,4 @@ add_subdirectory(${SRC_DIR}/../setups ${CMAKE_CURRENT_BINARY_DIR}/setups) set(libs ntt_global ntt_framework ntt_metrics ntt_engines ntt_pgen) add_dependencies(${ENTITY} ${libs}) -target_link_libraries(${ENTITY} PUBLIC ${libs}) -target_link_libraries(${ENTITY} PUBLIC stdc++fs) \ No newline at end of file +target_link_libraries(${ENTITY} ${libs}) From 1aa8c8c5394a5de86c6662c4a6d905ff7a7b9f6d Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 27 Jun 2024 15:43:39 -0400 Subject: [PATCH 03/28] Update CMakeLists.txt needs to be PUBLIC (fixing my own bug) --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 38478ad11..5d7f0abb4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,4 +37,4 @@ add_subdirectory(${SRC_DIR}/../setups ${CMAKE_CURRENT_BINARY_DIR}/setups) set(libs ntt_global ntt_framework ntt_metrics ntt_engines ntt_pgen) add_dependencies(${ENTITY} ${libs}) -target_link_libraries(${ENTITY} ${libs}) +target_link_libraries(${ENTITY} PUBLIC ${libs}) From 4bd5728dd32ae452a70e6186040963fa8e95966c Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 27 Jun 2024 15:59:39 -0400 Subject: [PATCH 04/28] added lib --- src/framework/domain/metadomain.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index 584bfa551..b13475fd6 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -21,6 +21,7 @@ #include #endif +#include #include #include #include From 6d4880bb8c4ad3956037a451ecaa7ac0964224ba Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 27 Jun 2024 19:12:22 -0400 Subject: [PATCH 05/28] tests for double prec --- src/framework/tests/grid_mesh.cpp | 2 +- src/framework/tests/parameters.cpp | 14 +--------- src/kernels/tests/deposit.cpp | 26 +++++++++--------- src/kernels/tests/particle_moments.cpp | 29 ++++++++++---------- src/kernels/tests/prtl_bc.cpp | 37 +++++++++++++------------- 5 files changed, 48 insertions(+), 60 deletions(-) diff --git a/src/framework/tests/grid_mesh.cpp b/src/framework/tests/grid_mesh.cpp index 17f734a19..4dea275ce 100644 --- a/src/framework/tests/grid_mesh.cpp +++ b/src/framework/tests/grid_mesh.cpp @@ -39,7 +39,7 @@ auto main(int argc, char* argv[]) -> int { HERE); raise::ErrorIf(mesh.extent(d) != ext[(unsigned short)d], "extent != ext", HERE); } - raise::ErrorIf(not cmp::AlmostEqual(mesh.metric.dxMin(), (real_t)0.1154700538), + raise::ErrorIf(not cmp::AlmostEqual(mesh.metric.dxMin(), (real_t)(0.2 / std::sqrt(3.0))), "dxMin wrong", HERE); } catch (const std::exception& e) { diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 7b353a542..8f02d1750 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -275,10 +275,6 @@ auto main(int argc, char* argv[]) -> int { fbc.size(), "grid.boundaries.fields.size()"); - assert_equal(params_mink_1d.get("particles.nspec"), - (std::size_t)2, - "particles.nspec"); - const auto species = params_mink_1d.get>( "particles.species"); assert_equal(species[0].label(), "e-", "species[0].label"); @@ -380,10 +376,6 @@ auto main(int argc, char* argv[]) -> int { true, "particles.use_weights"); - assert_equal(params_sph_2d.get("particles.nspec"), - (std::size_t)3, - "particles.nspec"); - assert_equal(params_sph_2d.get("algorithms.gca.e_ovr_b_max"), (real_t)0.95, "algorithms.gca.e_ovr_b_max"); @@ -460,7 +452,7 @@ auto main(int argc, char* argv[]) -> int { (real_t)(0.99), "grid.metric.ks_a"); assert_equal(params_qks_2d.get("grid.metric.ks_rh"), - (real_t)(1.1410673598), + (real_t)((1.0 + std::sqrt(1 - 0.99 * 0.99))), "grid.metric.ks_rh"); const auto expect = std::map { @@ -532,10 +524,6 @@ auto main(int argc, char* argv[]) -> int { defaults::bc::absorb::coeff, "grid.boundaries.absorb.coeff"); - assert_equal(params_qks_2d.get("particles.nspec"), - (std::size_t)2, - "particles.nspec"); - const auto species = params_qks_2d.get>( "particles.species"); assert_equal(species[0].label(), "e-", "species[0].label"); diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index 26d2e71eb..02292c643 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -85,17 +85,21 @@ void testDeposit(const std::vector& res, auto J_scat = Kokkos::Experimental::create_scatter_view(J); - const real_t xi = 0.53, xf = 0.47; - const real_t yi = 0.34, yf = 0.52; + const int i0 = 4, j0 = 4; + + const prtldx_t dxi = 0.53, dxf = 0.47; + const prtldx_t dyi = 0.34, dyf = 0.52; + const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0 + (real_t)dxf; + const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0 + (real_t)dyf; const real_t xr = 0.5 * (xi + xf); const real_t yr = 0.5 * (yi + yf); - const real_t Wx1 = 0.5 * (xi + xr); - const real_t Wx2 = 0.5 * (xf + xr); + const real_t Wx1 = 0.5 * (xi + xr) - (real_t)i0; + const real_t Wx2 = 0.5 * (xf + xr) - (real_t)i0; - const real_t Wy1 = 0.5 * (yi + yr); - const real_t Wy2 = 0.5 * (yf + yr); + const real_t Wy1 = 0.5 * (yi + yr) - (real_t)j0; + const real_t Wy2 = 0.5 * (yf + yr) - (real_t)j0; const real_t Fx1 = (xr - xi); const real_t Fx2 = (xf - xr); @@ -109,16 +113,14 @@ void testDeposit(const std::vector& res, const real_t Jy1 = Fy1 * (1 - Wx1) + Fy2 * (1 - Wx2); const real_t Jy2 = Fy1 * Wx1 + Fy2 * Wx2; - const int i0 = 4, j0 = 4; - put_value(i1, i0, 0); put_value(i2, j0, 0); put_value(i1_prev, i0, 0); put_value(i2_prev, j0, 0); - put_value(dx1, xf, 0); - put_value(dx2, yf, 0); - put_value(dx1_prev, xi, 0); - put_value(dx2_prev, yi, 0); + put_value(dx1, dxf, 0); + put_value(dx2, dyf, 0); + put_value(dx1_prev, dxi, 0); + put_value(dx2_prev, dyi, 0); put_value(weight, 1.0, 0); put_value(tag, ParticleTag::alive, 0); diff --git a/src/kernels/tests/particle_moments.cpp b/src/kernels/tests/particle_moments.cpp index 29b1c3598..25ed7d4d9 100644 --- a/src/kernels/tests/particle_moments.cpp +++ b/src/kernels/tests/particle_moments.cpp @@ -225,24 +225,22 @@ void testParticleMoments(const std::vector& res, } } } - const real_t gamma_1 = math::sqrt( - ONE + v1[0] * v1[0] + v1[1] * v1[1] + v1[2] * v1[2]); - const real_t gamma_2 = math::sqrt( - ONE + v2[0] * v2[0] + v2[1] * v2[1] + v2[2] * v2[2]); + const real_t gammaSQR_1 = ONE + v1[0] * v1[0] + v1[1] * v1[1] + v1[2] * v1[2]; + const real_t gammaSQR_2 = ONE + v2[0] * v2[0] + v2[1] * v2[1] + v2[2] * v2[2]; - const real_t gamma_1_expect = math::sqrt(1.0 + 1.0 + 4.0 + 9.0); - const real_t gamma_2_expect = math::sqrt(1.0 + 9.0 + 4.0 + 1.0); + const real_t gammaSQR_1_expect = 15.0; + const real_t gammaSQR_2_expect = 15.0; - errorIf(not cmp::AlmostEqual(gamma_1, gamma_1_expect, epsilon * acc), - fmt::format("wrong gamma_1 %.8e %.8e for %dD %s", - gamma_1, - gamma_1_expect, + errorIf(not cmp::AlmostEqual_host(gammaSQR_1, gammaSQR_1_expect, epsilon * acc), + fmt::format("wrong gamma_1 %.12e %.12e for %dD %s", + gammaSQR_1, + gammaSQR_1_expect, metric.Dim, metric.Label)); - errorIf(not cmp::AlmostEqual(gamma_2, gamma_2_expect, epsilon * acc), - fmt::format("wrong gamma_2 %.8e %.8e for %dD %s", - gamma_2, - gamma_2_expect, + errorIf(not cmp::AlmostEqual_host(gammaSQR_2, gammaSQR_2_expect, epsilon * acc), + fmt::format("wrong gamma_2 %.12e %.12e for %dD %s", + gammaSQR_2, + gammaSQR_2_expect, metric.Dim, metric.Label)); } @@ -260,7 +258,8 @@ auto main(int argc, char* argv[]) -> int { 10 }, { { 0.0, 10.0 }, { 0.0, 10.0 } }, - {}); + {}, + 10); testParticleMoments>( { diff --git a/src/kernels/tests/prtl_bc.cpp b/src/kernels/tests/prtl_bc.cpp index a8817c36f..fa1103043 100644 --- a/src/kernels/tests/prtl_bc.cpp +++ b/src/kernels/tests/prtl_bc.cpp @@ -266,7 +266,6 @@ void testPeriodicBC(const std::vector& res, metric.template convert_xyz(xCd_1, xPh_1); metric.template convert_xyz(xCd_2, xPh_2); - // printf("t = %f\n", time); if constexpr (M::Dim == Dim::_1D or M::Dim == Dim::_2D or M::Dim == Dim::_3D) { xi_1 += dt * ux_1 / gamma_1; xi_2 += dt * ux_2 / gamma_2; @@ -282,12 +281,12 @@ void testPeriodicBC(const std::vector& res, if (xi_2 < extent[0].first) { xi_2 += sx; } - // printf(" x_1 = %.6e | %.6e\n", xPh_1[0], xi_1); - // printf(" x_2 = %.6e | %.6e\n", xPh_2[0], xi_2); - errorIf( - not equal(xPh_1[0], xi_1, fmt::format("xPh_1[0] != xi_1 @ t = %f", time))); - errorIf( - not equal(xPh_2[0], xi_2, fmt::format("xPh_2[0] != xi_2 @ t = %f", time))); + errorIf(not equal(xPh_1[0] / sx, + xi_1 / sx, + fmt::format("xPh_1[0] != xi_1 @ t = %f", time))); + errorIf(not equal(xPh_2[0] / sx, + xi_2 / sx, + fmt::format("xPh_2[0] != xi_2 @ t = %f", time))); } if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { yi_1 += dt * uy_1 / gamma_1; @@ -304,12 +303,12 @@ void testPeriodicBC(const std::vector& res, if (yi_2 < extent[1].first) { yi_2 += sy; } - // printf(" y_1 = %.6e | %.6e\n", xPh_1[1], yi_1); - // printf(" y_2 = %.6e | %.6e\n", xPh_2[1], yi_2); - errorIf( - not equal(xPh_1[1], yi_1, fmt::format("xPh_1[1] != yi_1 @ t = %f", time))); - errorIf( - not equal(xPh_2[1], yi_2, fmt::format("xPh_2[1] != yi_2 @ t = %f", time))); + errorIf(not equal(xPh_1[1] / sy, + yi_1 / sy, + fmt::format("xPh_1[1] != yi_1 @ t = %f", time))); + errorIf(not equal(xPh_2[1] / sy, + yi_2 / sy, + fmt::format("xPh_2[1] != yi_2 @ t = %f", time))); } if constexpr (M::Dim == Dim::_3D) { zi_1 += dt * uz_1 / gamma_1; @@ -326,12 +325,12 @@ void testPeriodicBC(const std::vector& res, if (zi_2 < extent[2].first) { zi_2 += sz; } - // printf(" z_1 = %.6e | %.6e\n", xPh_1[2], zi_1); - // printf(" z_2 = %.6e | %.6e\n", xPh_2[2], zi_2); - errorIf( - not equal(xPh_1[2], zi_1, fmt::format("xPh_1[2] != zi_1 @ t = %f", time))); - errorIf( - not equal(xPh_2[2], zi_2, fmt::format("xPh_2[2] != zi_2 @ t = %f", time))); + errorIf(not equal(xPh_1[2] / sz, + zi_1 / sz, + fmt::format("xPh_1[2] != zi_1 @ t = %f", time))); + errorIf(not equal(xPh_2[2] / sz, + zi_2 / sz, + fmt::format("xPh_2[2] != zi_2 @ t = %f", time))); } time += dt; } From 9cf718f0e73f28d37f0f98ab6cf90f8d6128dea7 Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 28 Jun 2024 21:57:55 -0400 Subject: [PATCH 06/28] new pgen for bc test --- setups/tests/srpic_bc/pgen.hpp | 172 ++++++++++++++++++++++++++++ setups/tests/srpic_bc/srpic_bc.toml | 68 +++++++++++ 2 files changed, 240 insertions(+) create mode 100644 setups/tests/srpic_bc/pgen.hpp create mode 100644 setups/tests/srpic_bc/srpic_bc.toml diff --git a/setups/tests/srpic_bc/pgen.hpp b/setups/tests/srpic_bc/pgen.hpp new file mode 100644 index 000000000..3a1ee25f5 --- /dev/null +++ b/setups/tests/srpic_bc/pgen.hpp @@ -0,0 +1,172 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "arch/traits.h" +#include "utils/numeric.h" + +#include "archetypes/energy_dist.h" +#include "archetypes/particle_injector.h" +#include "archetypes/problem_generator.h" +#include "archetypes/spatial_dist.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + +namespace user { + using namespace ntt; + + template + struct GaussianBlob : public arch::SpatialDistribution { + GaussianBlob(const M& metric, const coord_t& c, real_t s) + : arch::SpatialDistribution { metric } + , spreadSqr { SQR(s) } { + for (auto d = 0u; d < M::Dim; ++d) { + center[d] = c[d]; + } + } + + Inline auto operator()(const coord_t& x_Ph) const -> real_t override { + // assuming spherical + auto rSqr = SQR(x_Ph[0]) + SQR(center[0]) - + TWO * x_Ph[0] * center[0] * math::cos(x_Ph[1] - center[1]); + return std::exp(-rSqr / (2 * spreadSqr)); + } + + private: + tuple_t center; + const real_t spreadSqr; + }; + + template + struct PGen : public arch::ProblemGenerator { + + // compatibility traits for the problem generator + static constexpr auto engines = traits::compatible_with::value; + static constexpr auto metrics = + traits::compatible_with::value; + static constexpr auto dimensions = traits::compatible_with::value; + + // for easy access to variables in the child class + using arch::ProblemGenerator::D; + using arch::ProblemGenerator::C; + using arch::ProblemGenerator::params; + + const real_t r0, theta0, dr; + + inline PGen(const SimulationParams& p, const Metadomain&) + : arch::ProblemGenerator { p } + , r0 { p.template get("setup.r0") } + , theta0 { p.template get("setup.theta0") } + , dr { p.template get("setup.dr") } {} + + inline void InitPrtls(Domain& domain) { + const auto injector = arch::NonUniformInjector( + arch::ColdDist(domain.mesh.metric), + GaussianBlob(domain.mesh.metric, { r0, theta0 }, dr), + { 1, 2 }); + arch::InjectNonUniform(params, + domain, + injector, + 1.0, + true); + const auto& metric = domain.mesh.metric; + for (auto& species : domain.species) { + auto i1 = species.i1; + auto i2 = species.i2; + auto dx1 = species.dx1; + auto dx2 = species.dx2; + auto phi = species.phi; + auto pld1 = species.pld[0]; + auto pld2 = species.pld[1]; + auto pld3 = species.pld[2]; + Kokkos::parallel_for( + "SavePositions", + species.npart(), + Lambda(index_t p) { + const coord_t x_Code { + static_cast(i1(p)) + static_cast(dx1(p)), + static_cast(i2(p)) + static_cast(dx2(p)), + phi(p) + }; + coord_t x_XYZ { ZERO }; + + metric.template convert_xyz(x_Code, x_XYZ); + pld1(p) = x_XYZ[0]; + pld2(p) = x_XYZ[1]; + pld3(p) = x_XYZ[2]; + }); + } + } + + void CustomPostStep(std::size_t, long double time, Domain& domain) { + const auto& metric = domain.mesh.metric; + auto& random_pool = domain.random_pool; + const auto time_dbl = static_cast(time); + for (auto s = 0u; s < domain.species.size(); ++s) { + auto& species = domain.species[s]; + auto ux1 = species.ux1; + auto ux2 = species.ux2; + auto ux3 = species.ux3; + auto i1 = species.i1; + auto i2 = species.i2; + auto dx1 = species.dx1; + auto dx2 = species.dx2; + auto phi = species.phi; + auto pld1 = species.pld[0]; + auto pld2 = species.pld[1]; + auto pld3 = species.pld[2]; + Kokkos::parallel_for( + "SavePositions", + species.npart(), + ClassLambda(index_t p) { + const coord_t x_Code { + static_cast(i1(p)) + static_cast(dx1(p)), + static_cast(i2(p)) + static_cast(dx2(p)), + phi(p) + }; + coord_t x_XYZ { ZERO }; + + metric.template convert_xyz(x_Code, x_XYZ); + auto rand_gen = random_pool.get_state(); + if (s == 0) { + const auto dist = math::sqrt(SQR(x_XYZ[0] - pld1(p)) + + SQR(x_XYZ[1] - pld2(p)) + + SQR(x_XYZ[2] - pld3(p))); + if (time_dbl < 20.0) { + ux1(p) = 0.0; + ux2(p) = 0.0; + ux3(p) = -0.9; + } else if (time_dbl < 25.0) { + ux1(p) = 0.9; + ux2(p) = 0.0; + ux3(p) = 0.0; + } else if (time_dbl < 45.0) { + ux1(p) = 0.0; + ux2(p) = 0.0; + ux3(p) = 0.9; + } else if (time_dbl < 50.0) { + ux1(p) = 0.9; + ux2(p) = 0.0; + ux3(p) = 0.0; + } else { + ux1(p) = 5.0 * (Random(rand_gen) - HALF); + ux2(p) = 5.0 * (Random(rand_gen) - HALF); + ux3(p) = 5.0 * (Random(rand_gen) - HALF); + } + } + // add a slight spread + ux1(p) += 0.002 * (Random(rand_gen) - HALF); + ux2(p) += 0.002 * (Random(rand_gen) - HALF); + ux3(p) += 0.002 * (Random(rand_gen) - HALF); + random_pool.free_state(rand_gen); + }); + } + } + }; + +} // namespace user + +#endif \ No newline at end of file diff --git a/setups/tests/srpic_bc/srpic_bc.toml b/setups/tests/srpic_bc/srpic_bc.toml new file mode 100644 index 000000000..7b4aa3b84 --- /dev/null +++ b/setups/tests/srpic_bc/srpic_bc.toml @@ -0,0 +1,68 @@ +[simulation] + name = "srpic_bc" + engine = "srpic" + runtime = 100.0 + +[grid] + resolution = [512, 512] + extent = [[1.0, 30.0]] + + [grid.metric] + metric = "qspherical" + + [grid.boundaries] + fields = [["CONDUCTOR", "CONDUCTOR"]] + particles = [["REFLECT", "REFLECT"]] + +[scales] + larmor0 = 1e-4 + skindepth0 = 0.01 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 64.0 + use_weights = true + sort_interval = 100 + + [[particles.species]] + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e4 + pusher = "Boris" + n_payloads = 3 + + [[particles.species]] + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e4 + pusher = "Boris" + n_payloads = 3 + +[setup] + r0 = 10.0 + theta0 = 0.3 + dr = 0.2 + +[output] + format = "hdf5" + + [output.fields] + interval_time = 1.0 + quantities = ["N_1", "N_2", "E", "B", "T00"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + +[diagnostics] + interval = 5 + colored_stdout = true From 0d4db39c1d8eca744931d69de9d721fb6dc5121a Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 1 Jul 2024 05:27:48 +0000 Subject: [PATCH 07/28] linked stdc++fs --- src/global/CMakeLists.txt | 3 ++- src/output/tests/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/global/CMakeLists.txt b/src/global/CMakeLists.txt index 1038bc4b7..d00689283 100644 --- a/src/global/CMakeLists.txt +++ b/src/global/CMakeLists.txt @@ -22,4 +22,5 @@ add_library(ntt_global ${SOURCES}) target_include_directories(ntt_global PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} INTERFACE ${CMAKE_CURRENT_SOURCE_DIR} -) \ No newline at end of file +) +target_link_libraries(ntt_global PRIVATE stdc++fs) \ No newline at end of file diff --git a/src/output/tests/CMakeLists.txt b/src/output/tests/CMakeLists.txt index 926a3a89e..d33cc6c54 100644 --- a/src/output/tests/CMakeLists.txt +++ b/src/output/tests/CMakeLists.txt @@ -17,7 +17,7 @@ function(gen_test title) set (libs ntt_output ntt_global ntt_metrics ntt_framework) add_dependencies(${exec} ${libs}) - target_link_libraries(${exec} PRIVATE ${libs}) + target_link_libraries(${exec} PRIVATE ${libs} stdc++fs) add_test(NAME "OUTPUT::${title}" COMMAND "${exec}") endfunction() From 74ee866e8a72dd75c672da114de56a6edc3aa70f Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 5 Jul 2024 04:35:56 -0400 Subject: [PATCH 08/28] minor bug in srpic::currentampere range --- src/engines/srpic.hpp | 25 ++++++++++++++++++------- src/kernels/ampere_sr.hpp | 8 ++++---- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index ea01f591b..e5fd78929 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -538,11 +538,22 @@ namespace ntt { coeff / V0, ONE / n0)); } else { - range_t range {}; + auto range = domain.mesh.rangeActiveCells(); if constexpr (M::Dim == Dim::_2D) { - range = CreateRangePolicy( - { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, - { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); + if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { + range = CreateRangePolicy( + { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, + { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); + } + } else if constexpr (M::Dim == Dim::_3D) { + if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { + range = CreateRangePolicy({ domain.mesh.i_min(in::x1), + domain.mesh.i_min(in::x2), + domain.mesh.i_min(in::x3) }, + { domain.mesh.i_max(in::x1), + domain.mesh.i_max(in::x2) + 1, + domain.mesh.i_max(in::x3) }); + } } const auto ni2 = domain.mesh.n_active(in::x2); Kokkos::parallel_for( @@ -572,13 +583,13 @@ namespace ntt { * . . . . . */ if constexpr (M::Dim == Dim::_2D) { - if (domain.mesh.flds_bc_in({ +1, 0 }) == FldsBC::AXIS) { + if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { range = CreateRangePolicy( { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); } } else if constexpr (M::Dim == Dim::_3D) { - if (domain.mesh.flds_bc_in({ +1, 0, 0 }) == FldsBC::AXIS) { + if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { range = CreateRangePolicy({ domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2), domain.mesh.i_min(in::x3) }, @@ -600,7 +611,7 @@ namespace ntt { if constexpr (M::Dim == Dim::_3D) { size[2] = domain.mesh.n_active(in::x3); } - // !TODO: this needs to be done more efficiently + // @TODO: this needs to be done more efficiently for (unsigned short i = 0; i < nfilter; ++i) { Kokkos::deep_copy(domain.fields.buff, domain.fields.cur); Kokkos::parallel_for("CurrentsFilter", diff --git a/src/kernels/ampere_sr.hpp b/src/kernels/ampere_sr.hpp index 6dde42ff0..e4faec6ce 100644 --- a/src/kernels/ampere_sr.hpp +++ b/src/kernels/ampere_sr.hpp @@ -122,7 +122,8 @@ namespace kernel::sr { */ template class CurrentsAmpere_kernel { - static constexpr auto D = M::Dim; + static constexpr auto D = M::Dim; + static constexpr std::size_t i2min = N_GHOSTS; ndfield_t E; ndfield_t J; @@ -161,9 +162,8 @@ namespace kernel::sr { Inline void operator()(index_t i1, index_t i2) const { if constexpr (D == Dim::_2D) { - constexpr std::size_t i2min { N_GHOSTS }; - const real_t i1_ { COORD(i1) }; - const real_t i2_ { COORD(i2) }; + const real_t i1_ { COORD(i1) }; + const real_t i2_ { COORD(i2) }; // convert the "curly" current, to contravariant, normalized to // `J0=n0*q0` then add "curly" current with the right coefficient From 959cf5e5c6867852c5bd19e8973806b98e81d791 Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 28 Jun 2024 21:57:55 -0400 Subject: [PATCH 09/28] new pgen for bc test --- setups/tests/srpic_bc/pgen.hpp | 172 ++++++++++++++++++++++++++++ setups/tests/srpic_bc/srpic_bc.toml | 68 +++++++++++ 2 files changed, 240 insertions(+) create mode 100644 setups/tests/srpic_bc/pgen.hpp create mode 100644 setups/tests/srpic_bc/srpic_bc.toml diff --git a/setups/tests/srpic_bc/pgen.hpp b/setups/tests/srpic_bc/pgen.hpp new file mode 100644 index 000000000..3a1ee25f5 --- /dev/null +++ b/setups/tests/srpic_bc/pgen.hpp @@ -0,0 +1,172 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "arch/traits.h" +#include "utils/numeric.h" + +#include "archetypes/energy_dist.h" +#include "archetypes/particle_injector.h" +#include "archetypes/problem_generator.h" +#include "archetypes/spatial_dist.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + +namespace user { + using namespace ntt; + + template + struct GaussianBlob : public arch::SpatialDistribution { + GaussianBlob(const M& metric, const coord_t& c, real_t s) + : arch::SpatialDistribution { metric } + , spreadSqr { SQR(s) } { + for (auto d = 0u; d < M::Dim; ++d) { + center[d] = c[d]; + } + } + + Inline auto operator()(const coord_t& x_Ph) const -> real_t override { + // assuming spherical + auto rSqr = SQR(x_Ph[0]) + SQR(center[0]) - + TWO * x_Ph[0] * center[0] * math::cos(x_Ph[1] - center[1]); + return std::exp(-rSqr / (2 * spreadSqr)); + } + + private: + tuple_t center; + const real_t spreadSqr; + }; + + template + struct PGen : public arch::ProblemGenerator { + + // compatibility traits for the problem generator + static constexpr auto engines = traits::compatible_with::value; + static constexpr auto metrics = + traits::compatible_with::value; + static constexpr auto dimensions = traits::compatible_with::value; + + // for easy access to variables in the child class + using arch::ProblemGenerator::D; + using arch::ProblemGenerator::C; + using arch::ProblemGenerator::params; + + const real_t r0, theta0, dr; + + inline PGen(const SimulationParams& p, const Metadomain&) + : arch::ProblemGenerator { p } + , r0 { p.template get("setup.r0") } + , theta0 { p.template get("setup.theta0") } + , dr { p.template get("setup.dr") } {} + + inline void InitPrtls(Domain& domain) { + const auto injector = arch::NonUniformInjector( + arch::ColdDist(domain.mesh.metric), + GaussianBlob(domain.mesh.metric, { r0, theta0 }, dr), + { 1, 2 }); + arch::InjectNonUniform(params, + domain, + injector, + 1.0, + true); + const auto& metric = domain.mesh.metric; + for (auto& species : domain.species) { + auto i1 = species.i1; + auto i2 = species.i2; + auto dx1 = species.dx1; + auto dx2 = species.dx2; + auto phi = species.phi; + auto pld1 = species.pld[0]; + auto pld2 = species.pld[1]; + auto pld3 = species.pld[2]; + Kokkos::parallel_for( + "SavePositions", + species.npart(), + Lambda(index_t p) { + const coord_t x_Code { + static_cast(i1(p)) + static_cast(dx1(p)), + static_cast(i2(p)) + static_cast(dx2(p)), + phi(p) + }; + coord_t x_XYZ { ZERO }; + + metric.template convert_xyz(x_Code, x_XYZ); + pld1(p) = x_XYZ[0]; + pld2(p) = x_XYZ[1]; + pld3(p) = x_XYZ[2]; + }); + } + } + + void CustomPostStep(std::size_t, long double time, Domain& domain) { + const auto& metric = domain.mesh.metric; + auto& random_pool = domain.random_pool; + const auto time_dbl = static_cast(time); + for (auto s = 0u; s < domain.species.size(); ++s) { + auto& species = domain.species[s]; + auto ux1 = species.ux1; + auto ux2 = species.ux2; + auto ux3 = species.ux3; + auto i1 = species.i1; + auto i2 = species.i2; + auto dx1 = species.dx1; + auto dx2 = species.dx2; + auto phi = species.phi; + auto pld1 = species.pld[0]; + auto pld2 = species.pld[1]; + auto pld3 = species.pld[2]; + Kokkos::parallel_for( + "SavePositions", + species.npart(), + ClassLambda(index_t p) { + const coord_t x_Code { + static_cast(i1(p)) + static_cast(dx1(p)), + static_cast(i2(p)) + static_cast(dx2(p)), + phi(p) + }; + coord_t x_XYZ { ZERO }; + + metric.template convert_xyz(x_Code, x_XYZ); + auto rand_gen = random_pool.get_state(); + if (s == 0) { + const auto dist = math::sqrt(SQR(x_XYZ[0] - pld1(p)) + + SQR(x_XYZ[1] - pld2(p)) + + SQR(x_XYZ[2] - pld3(p))); + if (time_dbl < 20.0) { + ux1(p) = 0.0; + ux2(p) = 0.0; + ux3(p) = -0.9; + } else if (time_dbl < 25.0) { + ux1(p) = 0.9; + ux2(p) = 0.0; + ux3(p) = 0.0; + } else if (time_dbl < 45.0) { + ux1(p) = 0.0; + ux2(p) = 0.0; + ux3(p) = 0.9; + } else if (time_dbl < 50.0) { + ux1(p) = 0.9; + ux2(p) = 0.0; + ux3(p) = 0.0; + } else { + ux1(p) = 5.0 * (Random(rand_gen) - HALF); + ux2(p) = 5.0 * (Random(rand_gen) - HALF); + ux3(p) = 5.0 * (Random(rand_gen) - HALF); + } + } + // add a slight spread + ux1(p) += 0.002 * (Random(rand_gen) - HALF); + ux2(p) += 0.002 * (Random(rand_gen) - HALF); + ux3(p) += 0.002 * (Random(rand_gen) - HALF); + random_pool.free_state(rand_gen); + }); + } + } + }; + +} // namespace user + +#endif \ No newline at end of file diff --git a/setups/tests/srpic_bc/srpic_bc.toml b/setups/tests/srpic_bc/srpic_bc.toml new file mode 100644 index 000000000..7b4aa3b84 --- /dev/null +++ b/setups/tests/srpic_bc/srpic_bc.toml @@ -0,0 +1,68 @@ +[simulation] + name = "srpic_bc" + engine = "srpic" + runtime = 100.0 + +[grid] + resolution = [512, 512] + extent = [[1.0, 30.0]] + + [grid.metric] + metric = "qspherical" + + [grid.boundaries] + fields = [["CONDUCTOR", "CONDUCTOR"]] + particles = [["REFLECT", "REFLECT"]] + +[scales] + larmor0 = 1e-4 + skindepth0 = 0.01 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 64.0 + use_weights = true + sort_interval = 100 + + [[particles.species]] + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e4 + pusher = "Boris" + n_payloads = 3 + + [[particles.species]] + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e4 + pusher = "Boris" + n_payloads = 3 + +[setup] + r0 = 10.0 + theta0 = 0.3 + dr = 0.2 + +[output] + format = "hdf5" + + [output.fields] + interval_time = 1.0 + quantities = ["N_1", "N_2", "E", "B", "T00"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + +[diagnostics] + interval = 5 + colored_stdout = true From 13c164decf7497d9e7e4723a6c6d1c30e02777f4 Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 5 Jul 2024 04:35:56 -0400 Subject: [PATCH 10/28] minor bug in srpic::currentampere range --- src/engines/srpic.hpp | 25 ++++++++++++++++++------- src/kernels/ampere_sr.hpp | 8 ++++---- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index ea01f591b..e5fd78929 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -538,11 +538,22 @@ namespace ntt { coeff / V0, ONE / n0)); } else { - range_t range {}; + auto range = domain.mesh.rangeActiveCells(); if constexpr (M::Dim == Dim::_2D) { - range = CreateRangePolicy( - { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, - { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); + if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { + range = CreateRangePolicy( + { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, + { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); + } + } else if constexpr (M::Dim == Dim::_3D) { + if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { + range = CreateRangePolicy({ domain.mesh.i_min(in::x1), + domain.mesh.i_min(in::x2), + domain.mesh.i_min(in::x3) }, + { domain.mesh.i_max(in::x1), + domain.mesh.i_max(in::x2) + 1, + domain.mesh.i_max(in::x3) }); + } } const auto ni2 = domain.mesh.n_active(in::x2); Kokkos::parallel_for( @@ -572,13 +583,13 @@ namespace ntt { * . . . . . */ if constexpr (M::Dim == Dim::_2D) { - if (domain.mesh.flds_bc_in({ +1, 0 }) == FldsBC::AXIS) { + if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { range = CreateRangePolicy( { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); } } else if constexpr (M::Dim == Dim::_3D) { - if (domain.mesh.flds_bc_in({ +1, 0, 0 }) == FldsBC::AXIS) { + if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { range = CreateRangePolicy({ domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2), domain.mesh.i_min(in::x3) }, @@ -600,7 +611,7 @@ namespace ntt { if constexpr (M::Dim == Dim::_3D) { size[2] = domain.mesh.n_active(in::x3); } - // !TODO: this needs to be done more efficiently + // @TODO: this needs to be done more efficiently for (unsigned short i = 0; i < nfilter; ++i) { Kokkos::deep_copy(domain.fields.buff, domain.fields.cur); Kokkos::parallel_for("CurrentsFilter", diff --git a/src/kernels/ampere_sr.hpp b/src/kernels/ampere_sr.hpp index 6dde42ff0..e4faec6ce 100644 --- a/src/kernels/ampere_sr.hpp +++ b/src/kernels/ampere_sr.hpp @@ -122,7 +122,8 @@ namespace kernel::sr { */ template class CurrentsAmpere_kernel { - static constexpr auto D = M::Dim; + static constexpr auto D = M::Dim; + static constexpr std::size_t i2min = N_GHOSTS; ndfield_t E; ndfield_t J; @@ -161,9 +162,8 @@ namespace kernel::sr { Inline void operator()(index_t i1, index_t i2) const { if constexpr (D == Dim::_2D) { - constexpr std::size_t i2min { N_GHOSTS }; - const real_t i1_ { COORD(i1) }; - const real_t i2_ { COORD(i2) }; + const real_t i1_ { COORD(i1) }; + const real_t i2_ { COORD(i2) }; // convert the "curly" current, to contravariant, normalized to // `J0=n0*q0` then add "curly" current with the right coefficient From f308a4d9f2cace30699e64c76cc23ae613a9880c Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:44:59 -0400 Subject: [PATCH 11/28] const in range policy --- src/framework/domain/grid.cpp | 33 +++++++++++++++++---------------- src/framework/domain/grid.h | 14 +++++++------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/framework/domain/grid.cpp b/src/framework/domain/grid.cpp index d189eaf5d..9302386e1 100644 --- a/src/framework/domain/grid.cpp +++ b/src/framework/domain/grid.cpp @@ -9,19 +9,19 @@ namespace ntt { template <> - auto Grid::rangeAllCells() -> range_t { + auto Grid::rangeAllCells() const -> range_t { box_region_t region { CellLayer::allLayer }; return rangeCells(region); } template <> - auto Grid::rangeAllCells() -> range_t { + auto Grid::rangeAllCells() const -> range_t { box_region_t region { CellLayer::allLayer, CellLayer::allLayer }; return rangeCells(region); } template <> - auto Grid::rangeAllCells() -> range_t { + auto Grid::rangeAllCells() const -> range_t { box_region_t region { CellLayer::allLayer, CellLayer::allLayer, CellLayer::allLayer }; @@ -29,19 +29,19 @@ namespace ntt { } template <> - auto Grid::rangeActiveCells() -> range_t { + auto Grid::rangeActiveCells() const -> range_t { box_region_t region { CellLayer::activeLayer }; return rangeCells(region); } template <> - auto Grid::rangeActiveCells() -> range_t { + auto Grid::rangeActiveCells() const -> range_t { box_region_t region { CellLayer::activeLayer, CellLayer::activeLayer }; return rangeCells(region); } template <> - auto Grid::rangeActiveCells() -> range_t { + auto Grid::rangeActiveCells() const -> range_t { box_region_t region { CellLayer::activeLayer, CellLayer::activeLayer, CellLayer::activeLayer }; @@ -49,7 +49,7 @@ namespace ntt { } template - auto Grid::rangeCells(const box_region_t& region) -> range_t { + auto Grid::rangeCells(const box_region_t& region) const -> range_t { tuple_t imin, imax; for (unsigned short i = 0; i < (unsigned short)D; i++) { switch (region[i]) { @@ -87,7 +87,8 @@ namespace ntt { // !TODO: too ugly, implement a better solution (combine with device) template - auto Grid::rangeCellsOnHost(const box_region_t& region) -> range_h_t { + auto Grid::rangeCellsOnHost(const box_region_t& region) const + -> range_h_t { tuple_t imin, imax; for (unsigned short i = 0; i < (unsigned short)D; i++) { switch (region[i]) { @@ -123,19 +124,19 @@ namespace ntt { } template <> - auto Grid::rangeAllCellsOnHost() -> range_h_t { + auto Grid::rangeAllCellsOnHost() const -> range_h_t { box_region_t region { CellLayer::allLayer }; return rangeCellsOnHost(region); } template <> - auto Grid::rangeAllCellsOnHost() -> range_h_t { + auto Grid::rangeAllCellsOnHost() const -> range_h_t { box_region_t region { CellLayer::allLayer, CellLayer::allLayer }; return rangeCellsOnHost(region); } template <> - auto Grid::rangeAllCellsOnHost() -> range_h_t { + auto Grid::rangeAllCellsOnHost() const -> range_h_t { box_region_t region { CellLayer::allLayer, CellLayer::allLayer, CellLayer::allLayer }; @@ -143,19 +144,19 @@ namespace ntt { } template <> - auto Grid::rangeActiveCellsOnHost() -> range_h_t { + auto Grid::rangeActiveCellsOnHost() const -> range_h_t { box_region_t region { CellLayer::activeLayer }; return rangeCellsOnHost(region); } template <> - auto Grid::rangeActiveCellsOnHost() -> range_h_t { + auto Grid::rangeActiveCellsOnHost() const -> range_h_t { box_region_t region { CellLayer::activeLayer, CellLayer::activeLayer }; return rangeCellsOnHost(region); } template <> - auto Grid::rangeActiveCellsOnHost() -> range_h_t { + auto Grid::rangeActiveCellsOnHost() const -> range_h_t { box_region_t region { CellLayer::activeLayer, CellLayer::activeLayer, CellLayer::activeLayer }; @@ -163,7 +164,7 @@ namespace ntt { } template - auto Grid::rangeCells(const tuple_t, D>& ranges) + auto Grid::rangeCells(const tuple_t, D>& ranges) const -> range_t { tuple_t imin, imax; for (unsigned short i = 0; i < (unsigned short)D; i++) { @@ -182,4 +183,4 @@ namespace ntt { template struct Grid; template struct Grid; -} // namespace ntt \ No newline at end of file +} // namespace ntt diff --git a/src/framework/domain/grid.h b/src/framework/domain/grid.h index 176263189..97a939117 100644 --- a/src/framework/domain/grid.h +++ b/src/framework/domain/grid.h @@ -159,19 +159,19 @@ namespace ntt { * @brief Loop over all active cells (disregard ghost cells) * @returns Kokkos range policy with proper min/max indices and dimension */ - auto rangeActiveCells() -> range_t; + auto rangeActiveCells() const -> range_t; /** * @brief Loop over all cells * @returns Kokkos range policy with proper min/max indices and dimension */ - auto rangeAllCells() -> range_t; + auto rangeAllCells() const -> range_t; /** * @brief Pick a particular region of cells * @param box_region_t region of cells to pick: tuple of cellLayer objects * @returns Kokkos range policy with proper min/max indices and dimension */ - auto rangeCells(const box_region_t&) -> range_t; + auto rangeCells(const box_region_t&) const -> range_t; /** * @brief Pick a particular region of cells * @overload @@ -180,7 +180,7 @@ namespace ntt { * @example {{0, 0}, {0, 0}, {0, 0}} corresponds to allActiveLayer in all 3 dimensions * @returns Kokkos range policy with proper min/max indices and dimension */ - auto rangeCells(const tuple_t, D>&) -> range_t; + auto rangeCells(const tuple_t, D>&) const -> range_t; /* Ranges in the host execution space ----------------------------------- */ /** @@ -188,20 +188,20 @@ namespace ntt { * @returns Kokkos range policy in the host space with proper min/max * indices and dimension. */ - auto rangeActiveCellsOnHost() -> range_h_t; + auto rangeActiveCellsOnHost() const -> range_h_t; /** * @brief Loop over all cells * @returns Kokkos range policy in the host space with proper min/max * indices and dimension. */ - auto rangeAllCellsOnHost() -> range_h_t; + auto rangeAllCellsOnHost() const -> range_h_t; /** * @brief Pick a particular region of cells * @param box_region_t region of cells to pick: tuple of cellLayer objects * @returns Kokkos range policy in the host space with proper min/max * indices and dimension. */ - auto rangeCellsOnHost(const box_region_t&) -> range_h_t; + auto rangeCellsOnHost(const box_region_t&) const -> range_h_t; protected: std::vector m_resolution; From 09eceb78825c7f65ab628f83a1fbc2cae72576e5 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:45:48 -0400 Subject: [PATCH 12/28] header added --- src/archetypes/particle_injector.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/archetypes/particle_injector.h b/src/archetypes/particle_injector.h index dd1aa957c..4e75003bb 100644 --- a/src/archetypes/particle_injector.h +++ b/src/archetypes/particle_injector.h @@ -1,13 +1,17 @@ /** * @file archetypes/particle_injector.h - * @brief Particle injector routines - * ... + * @brief Particle injector routines and classes + * @implements + * - arch::UniformInjector<> + * - arch::NonUniformInjector<> + * - arch::AtmosphereInjector<> + * - arch::InjectUniform<> -> void + * - arch::InjectGlobally<> -> void + * - arch::InjectNonUniform<> -> void + * @namespaces: + * - arch:: */ -/* -------------------------------------------------------------------------- */ -/* This header file is still under construction */ -/* -------------------------------------------------------------------------- */ - #ifndef ARCHETYPES_PARTICLE_INJECTOR_H #define ARCHETYPES_PARTICLE_INJECTOR_H From 3b32aaec7ad41850fa9886b38856aa8db1c14b41 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:46:20 -0400 Subject: [PATCH 13/28] precision as print --- src/engines/engine_printer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index d94d00376..90dec3326 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -137,6 +137,7 @@ namespace ntt { const auto hash = std::string(ENTITY_GIT_HASH); const auto pgen = std::string(PGEN); const auto nspec = metadomain.species_params().size(); + const auto precision = (sizeof(real_t) == 4) ? "single" : "double"; #if defined(__clang__) const std::string ccx = "Clang/LLVM " __clang_version__; @@ -219,6 +220,7 @@ namespace ntt { add_param(report, 4, "HDF5", "%s", hdf5_version.c_str()); add_param(report, 4, "Kokkos", "%s", kokkos_version.c_str()); add_param(report, 4, "ADIOS2", "%s", adios2_version.c_str()); + add_param(report, 4, "Precision", "%s", precision); add_param(report, 4, "Debug", "%s", dbg.c_str()); report += "\n"; add_category(report, 4, "Configuration"); From 7bcd17a8026774b59dd63c84b948d819d57b1951 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:47:45 -0400 Subject: [PATCH 14/28] ranges for AXIS bc separate function --- src/engines/srpic.hpp | 84 ++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 53 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index e5fd78929..482de8ef1 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -208,6 +208,7 @@ namespace ntt { m_params.template get( "algorithms.timestep.correction") * dt; + auto range = range_with_axis_BCs(domain); if constexpr (M::CoordType == Coord::Cart) { // minkowski case const auto dx = math::sqrt(domain.mesh.metric.template h_<1, 1>({})); @@ -222,15 +223,9 @@ namespace ntt { Kokkos::parallel_for( "Ampere", - domain.mesh.rangeActiveCells(), + range, kernel::mink::Ampere_kernel(domain.fields.em, coeff1, coeff2)); } else { - range_t range {}; - if constexpr (M::Dim == Dim::_2D) { - range = CreateRangePolicy( - { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, - { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); - } const auto ni2 = domain.mesh.n_active(in::x2); Kokkos::parallel_for("Ampere", range, @@ -538,24 +533,8 @@ namespace ntt { coeff / V0, ONE / n0)); } else { - auto range = domain.mesh.rangeActiveCells(); - if constexpr (M::Dim == Dim::_2D) { - if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { - range = CreateRangePolicy( - { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, - { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); - } - } else if constexpr (M::Dim == Dim::_3D) { - if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { - range = CreateRangePolicy({ domain.mesh.i_min(in::x1), - domain.mesh.i_min(in::x2), - domain.mesh.i_min(in::x3) }, - { domain.mesh.i_max(in::x1), - domain.mesh.i_max(in::x2) + 1, - domain.mesh.i_max(in::x3) }); - } - } - const auto ni2 = domain.mesh.n_active(in::x2); + auto range = range_with_axis_BCs(domain); + const auto ni2 = domain.mesh.n_active(in::x2); Kokkos::parallel_for( "Ampere", range, @@ -571,34 +550,7 @@ namespace ntt { void CurrentsFilter(domain_t& domain) { logger::Checkpoint("Launching currents filtering kernels", HERE); - range_t range = domain.mesh.rangeActiveCells(); - if constexpr (M::CoordType != Coord::Cart) { - /** - * @brief taking one extra cell in the x2 direction if AXIS BCs - * . . . . . - * . ^= =^ . - * . |* *\*. - * . |* *\*. - * . ^- -^ . - * . . . . . - */ - if constexpr (M::Dim == Dim::_2D) { - if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { - range = CreateRangePolicy( - { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, - { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); - } - } else if constexpr (M::Dim == Dim::_3D) { - if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { - range = CreateRangePolicy({ domain.mesh.i_min(in::x1), - domain.mesh.i_min(in::x2), - domain.mesh.i_min(in::x3) }, - { domain.mesh.i_max(in::x1), - domain.mesh.i_max(in::x2) + 1, - domain.mesh.i_max(in::x3) }); - } - } - } + auto range = range_with_axis_BCs(domain); const auto nfilter = m_params.template get( "algorithms.current_filters"); tuple_t size; @@ -1239,6 +1191,32 @@ namespace ntt { } return { sign, dim, xg_min, xg_max }; } + + auto range_with_axis_BCs(const domain_t& domain) -> range_t { + auto range = domain.mesh.rangeActiveCells(); + if constexpr (M::CoordType != Coord::Cart) { + /** + * @brief taking one extra cell in the x2 direction if AXIS BCs + */ + if constexpr (M::Dim == Dim::_2D) { + if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { + range = CreateRangePolicy( + { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, + { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); + } + } else if constexpr (M::Dim == Dim::_3D) { + if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { + range = CreateRangePolicy({ domain.mesh.i_min(in::x1), + domain.mesh.i_min(in::x2), + domain.mesh.i_min(in::x3) }, + { domain.mesh.i_max(in::x1), + domain.mesh.i_max(in::x2) + 1, + domain.mesh.i_max(in::x3) }); + } + } + } + return range; + } }; } // namespace ntt From 2d2594db2925354a3503cae12eb186f11802f466 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:47:59 -0400 Subject: [PATCH 15/28] rm temp pgen --- setups/tests/srpic_bc/pgen.hpp | 172 ---------------------------- setups/tests/srpic_bc/srpic_bc.toml | 68 ----------- 2 files changed, 240 deletions(-) delete mode 100644 setups/tests/srpic_bc/pgen.hpp delete mode 100644 setups/tests/srpic_bc/srpic_bc.toml diff --git a/setups/tests/srpic_bc/pgen.hpp b/setups/tests/srpic_bc/pgen.hpp deleted file mode 100644 index 3a1ee25f5..000000000 --- a/setups/tests/srpic_bc/pgen.hpp +++ /dev/null @@ -1,172 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "enums.h" -#include "global.h" - -#include "arch/kokkos_aliases.h" -#include "arch/traits.h" -#include "utils/numeric.h" - -#include "archetypes/energy_dist.h" -#include "archetypes/particle_injector.h" -#include "archetypes/problem_generator.h" -#include "archetypes/spatial_dist.h" -#include "framework/domain/domain.h" -#include "framework/domain/metadomain.h" - -namespace user { - using namespace ntt; - - template - struct GaussianBlob : public arch::SpatialDistribution { - GaussianBlob(const M& metric, const coord_t& c, real_t s) - : arch::SpatialDistribution { metric } - , spreadSqr { SQR(s) } { - for (auto d = 0u; d < M::Dim; ++d) { - center[d] = c[d]; - } - } - - Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - // assuming spherical - auto rSqr = SQR(x_Ph[0]) + SQR(center[0]) - - TWO * x_Ph[0] * center[0] * math::cos(x_Ph[1] - center[1]); - return std::exp(-rSqr / (2 * spreadSqr)); - } - - private: - tuple_t center; - const real_t spreadSqr; - }; - - template - struct PGen : public arch::ProblemGenerator { - - // compatibility traits for the problem generator - static constexpr auto engines = traits::compatible_with::value; - static constexpr auto metrics = - traits::compatible_with::value; - static constexpr auto dimensions = traits::compatible_with::value; - - // for easy access to variables in the child class - using arch::ProblemGenerator::D; - using arch::ProblemGenerator::C; - using arch::ProblemGenerator::params; - - const real_t r0, theta0, dr; - - inline PGen(const SimulationParams& p, const Metadomain&) - : arch::ProblemGenerator { p } - , r0 { p.template get("setup.r0") } - , theta0 { p.template get("setup.theta0") } - , dr { p.template get("setup.dr") } {} - - inline void InitPrtls(Domain& domain) { - const auto injector = arch::NonUniformInjector( - arch::ColdDist(domain.mesh.metric), - GaussianBlob(domain.mesh.metric, { r0, theta0 }, dr), - { 1, 2 }); - arch::InjectNonUniform(params, - domain, - injector, - 1.0, - true); - const auto& metric = domain.mesh.metric; - for (auto& species : domain.species) { - auto i1 = species.i1; - auto i2 = species.i2; - auto dx1 = species.dx1; - auto dx2 = species.dx2; - auto phi = species.phi; - auto pld1 = species.pld[0]; - auto pld2 = species.pld[1]; - auto pld3 = species.pld[2]; - Kokkos::parallel_for( - "SavePositions", - species.npart(), - Lambda(index_t p) { - const coord_t x_Code { - static_cast(i1(p)) + static_cast(dx1(p)), - static_cast(i2(p)) + static_cast(dx2(p)), - phi(p) - }; - coord_t x_XYZ { ZERO }; - - metric.template convert_xyz(x_Code, x_XYZ); - pld1(p) = x_XYZ[0]; - pld2(p) = x_XYZ[1]; - pld3(p) = x_XYZ[2]; - }); - } - } - - void CustomPostStep(std::size_t, long double time, Domain& domain) { - const auto& metric = domain.mesh.metric; - auto& random_pool = domain.random_pool; - const auto time_dbl = static_cast(time); - for (auto s = 0u; s < domain.species.size(); ++s) { - auto& species = domain.species[s]; - auto ux1 = species.ux1; - auto ux2 = species.ux2; - auto ux3 = species.ux3; - auto i1 = species.i1; - auto i2 = species.i2; - auto dx1 = species.dx1; - auto dx2 = species.dx2; - auto phi = species.phi; - auto pld1 = species.pld[0]; - auto pld2 = species.pld[1]; - auto pld3 = species.pld[2]; - Kokkos::parallel_for( - "SavePositions", - species.npart(), - ClassLambda(index_t p) { - const coord_t x_Code { - static_cast(i1(p)) + static_cast(dx1(p)), - static_cast(i2(p)) + static_cast(dx2(p)), - phi(p) - }; - coord_t x_XYZ { ZERO }; - - metric.template convert_xyz(x_Code, x_XYZ); - auto rand_gen = random_pool.get_state(); - if (s == 0) { - const auto dist = math::sqrt(SQR(x_XYZ[0] - pld1(p)) + - SQR(x_XYZ[1] - pld2(p)) + - SQR(x_XYZ[2] - pld3(p))); - if (time_dbl < 20.0) { - ux1(p) = 0.0; - ux2(p) = 0.0; - ux3(p) = -0.9; - } else if (time_dbl < 25.0) { - ux1(p) = 0.9; - ux2(p) = 0.0; - ux3(p) = 0.0; - } else if (time_dbl < 45.0) { - ux1(p) = 0.0; - ux2(p) = 0.0; - ux3(p) = 0.9; - } else if (time_dbl < 50.0) { - ux1(p) = 0.9; - ux2(p) = 0.0; - ux3(p) = 0.0; - } else { - ux1(p) = 5.0 * (Random(rand_gen) - HALF); - ux2(p) = 5.0 * (Random(rand_gen) - HALF); - ux3(p) = 5.0 * (Random(rand_gen) - HALF); - } - } - // add a slight spread - ux1(p) += 0.002 * (Random(rand_gen) - HALF); - ux2(p) += 0.002 * (Random(rand_gen) - HALF); - ux3(p) += 0.002 * (Random(rand_gen) - HALF); - random_pool.free_state(rand_gen); - }); - } - } - }; - -} // namespace user - -#endif \ No newline at end of file diff --git a/setups/tests/srpic_bc/srpic_bc.toml b/setups/tests/srpic_bc/srpic_bc.toml deleted file mode 100644 index 7b4aa3b84..000000000 --- a/setups/tests/srpic_bc/srpic_bc.toml +++ /dev/null @@ -1,68 +0,0 @@ -[simulation] - name = "srpic_bc" - engine = "srpic" - runtime = 100.0 - -[grid] - resolution = [512, 512] - extent = [[1.0, 30.0]] - - [grid.metric] - metric = "qspherical" - - [grid.boundaries] - fields = [["CONDUCTOR", "CONDUCTOR"]] - particles = [["REFLECT", "REFLECT"]] - -[scales] - larmor0 = 1e-4 - skindepth0 = 0.01 - -[algorithms] - current_filters = 4 - - [algorithms.timestep] - CFL = 0.5 - -[particles] - ppc0 = 64.0 - use_weights = true - sort_interval = 100 - - [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e4 - pusher = "Boris" - n_payloads = 3 - - [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e4 - pusher = "Boris" - n_payloads = 3 - -[setup] - r0 = 10.0 - theta0 = 0.3 - dr = 0.2 - -[output] - format = "hdf5" - - [output.fields] - interval_time = 1.0 - quantities = ["N_1", "N_2", "E", "B", "T00"] - - [output.particles] - enable = false - - [output.spectra] - enable = false - -[diagnostics] - interval = 5 - colored_stdout = true From 260bfd5e7a8bfbe9d943c2e60018a982f22a4e35 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:48:18 -0400 Subject: [PATCH 16/28] potential race condition fixed --- src/kernels/injectors.hpp | 48 +++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/kernels/injectors.hpp b/src/kernels/injectors.hpp index ae8440342..7ad945ad5 100644 --- a/src/kernels/injectors.hpp +++ b/src/kernels/injectors.hpp @@ -568,21 +568,21 @@ namespace kernel { if (ppc == 0) { return; } - const auto index = Kokkos::atomic_fetch_add(&idx(), ppc); - auto rand_gen = random_pool.get_state(); + auto rand_gen = random_pool.get_state(); for (auto p { 0u }; p < ppc; ++p) { - const auto dx1 = Random(rand_gen); - const auto dx2 = Random(rand_gen); + const auto index = Kokkos::atomic_fetch_add(&idx(), 1); + const auto dx1 = Random(rand_gen); + const auto dx2 = Random(rand_gen); - i1s_1(index + offset1 - p) = static_cast(i1) - N_GHOSTS; - dx1s_1(index + offset1 - p) = dx1; - i1s_2(index + offset2 - p) = static_cast(i1) - N_GHOSTS; - dx1s_2(index + offset2 - p) = dx1; + i1s_1(index + offset1) = static_cast(i1) - N_GHOSTS; + dx1s_1(index + offset1) = dx1; + i1s_2(index + offset2) = static_cast(i1) - N_GHOSTS; + dx1s_2(index + offset2) = dx1; - i2s_1(index + offset1 - p) = static_cast(i2) - N_GHOSTS; - dx2s_1(index + offset1 - p) = dx2; - i2s_2(index + offset2 - p) = static_cast(i2) - N_GHOSTS; - dx2s_2(index + offset2 - p) = dx2; + i2s_1(index + offset1) = static_cast(i2) - N_GHOSTS; + dx2s_1(index + offset1) = dx2; + i2s_2(index + offset2) = static_cast(i2) - N_GHOSTS; + dx2s_2(index + offset2) = dx2; vec_t v_T { ZERO }, v_Cd { ZERO }; energy_dist(x_Ph, v_T, spidx1); @@ -591,28 +591,28 @@ namespace kernel { } else if constexpr (S == SimEngine::GRPIC) { metric.template transform(x_Cd_, v_T, v_Cd); } - ux1s_1(index + offset1 - p) = v_Cd[0]; - ux2s_1(index + offset1 - p) = v_Cd[1]; - ux3s_1(index + offset1 - p) = v_Cd[2]; + ux1s_1(index + offset1) = v_Cd[0]; + ux2s_1(index + offset1) = v_Cd[1]; + ux3s_1(index + offset1) = v_Cd[2]; energy_dist(x_Ph, v_T, spidx2); if constexpr (S == SimEngine::SRPIC) { metric.template transform_xyz(x_Cd_, v_T, v_Cd); } else if constexpr (S == SimEngine::GRPIC) { metric.template transform(x_Cd_, v_T, v_Cd); } - ux1s_2(index + offset2 - p) = v_Cd[0]; - ux2s_2(index + offset2 - p) = v_Cd[1]; - ux3s_2(index + offset2 - p) = v_Cd[2]; + ux1s_2(index + offset2) = v_Cd[0]; + ux2s_2(index + offset2) = v_Cd[1]; + ux3s_2(index + offset2) = v_Cd[2]; - tags_1(index + offset1 - p) = ParticleTag::alive; - tags_2(index + offset2 - p) = ParticleTag::alive; + tags_1(index + offset1) = ParticleTag::alive; + tags_2(index + offset2) = ParticleTag::alive; if (M::CoordType == Coord::Cart) { - weights_1(index + offset1 - p) = ONE; - weights_2(index + offset2 - p) = ONE; + weights_1(index + offset1) = ONE; + weights_2(index + offset2) = ONE; } else { const auto wei = metric.sqrt_det_h({ i1_ + HALF, i2_ + HALF }) * inv_V0; - weights_1(index + offset1 - p) = wei; - weights_2(index + offset2 - p) = wei; + weights_1(index + offset1) = wei; + weights_2(index + offset2) = wei; } } random_pool.free_state(rand_gen); From 7d4d3052421fb7e752087f6c160ad7deccdd3dc0 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:51:48 -0400 Subject: [PATCH 17/28] culprit fixed for 1D and 3D too --- src/kernels/injectors.hpp | 98 +++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/src/kernels/injectors.hpp b/src/kernels/injectors.hpp index 7ad945ad5..9d3fd7d81 100644 --- a/src/kernels/injectors.hpp +++ b/src/kernels/injectors.hpp @@ -512,37 +512,37 @@ namespace kernel { if (ppc == 0) { return; } - const auto index = Kokkos::atomic_fetch_add(&idx(), ppc); - auto rand_gen = random_pool.get_state(); + auto rand_gen = random_pool.get_state(); for (auto p { 0u }; p < ppc; ++p) { - const auto dx1 = Random(rand_gen); + const auto index = Kokkos::atomic_fetch_add(&idx(), 1); + const auto dx1 = Random(rand_gen); - i1s_1(index + offset1 - p) = static_cast(i1) - N_GHOSTS; - dx1s_1(index + offset1 - p) = dx1; - i1s_2(index + offset2 - p) = static_cast(i1) - N_GHOSTS; - dx1s_2(index + offset2 - p) = dx1; + i1s_1(index + offset1) = static_cast(i1) - N_GHOSTS; + dx1s_1(index + offset1) = dx1; + i1s_2(index + offset2) = static_cast(i1) - N_GHOSTS; + dx1s_2(index + offset2) = dx1; vec_t v_T { ZERO }, v_XYZ { ZERO }; energy_dist(x_Ph, v_T, spidx1); metric.template transform_xyz(x_Cd, v_T, v_XYZ); - ux1s_1(index + offset1 - p) = v_XYZ[0]; - ux2s_1(index + offset1 - p) = v_XYZ[1]; - ux3s_1(index + offset1 - p) = v_XYZ[2]; + ux1s_1(index + offset1) = v_XYZ[0]; + ux2s_1(index + offset1) = v_XYZ[1]; + ux3s_1(index + offset1) = v_XYZ[2]; energy_dist(x_Ph, v_T, spidx2); metric.template transform_xyz(x_Cd, v_T, v_XYZ); - ux1s_2(index + offset2 - p) = v_XYZ[0]; - ux2s_2(index + offset2 - p) = v_XYZ[1]; - ux3s_2(index + offset2 - p) = v_XYZ[2]; + ux1s_2(index + offset2) = v_XYZ[0]; + ux2s_2(index + offset2) = v_XYZ[1]; + ux3s_2(index + offset2) = v_XYZ[2]; - tags_1(index + offset1 - p) = ParticleTag::alive; - tags_2(index + offset2 - p) = ParticleTag::alive; + tags_1(index + offset1) = ParticleTag::alive; + tags_2(index + offset2) = ParticleTag::alive; if (M::CoordType == Coord::Cart) { - weights_1(index + offset1 - p) = ONE; - weights_2(index + offset2 - p) = ONE; + weights_1(index + offset1) = ONE; + weights_2(index + offset2) = ONE; } else { - const auto wei = metric.sqrt_det_h({ i1_ + HALF }); - weights_1(index + offset1 - p) = wei; - weights_2(index + offset2 - p) = wei; + const auto wei = metric.sqrt_det_h({ i1_ + HALF }) * inv_V0; + weights_1(index + offset1) = wei; + weights_2(index + offset2) = wei; } } random_pool.free_state(rand_gen); @@ -635,27 +635,27 @@ namespace kernel { if (ppc == 0) { return; } - const auto index = Kokkos::atomic_fetch_add(&idx(), ppc); - auto rand_gen = random_pool.get_state(); + auto rand_gen = random_pool.get_state(); for (auto p { 0u }; p < ppc; ++p) { - const auto dx1 = Random(rand_gen); - const auto dx2 = Random(rand_gen); - const auto dx3 = Random(rand_gen); + const auto index = Kokkos::atomic_fetch_add(&idx(), 1); + const auto dx1 = Random(rand_gen); + const auto dx2 = Random(rand_gen); + const auto dx3 = Random(rand_gen); - i1s_1(index + offset1 - p) = static_cast(i1) - N_GHOSTS; - dx1s_1(index + offset1 - p) = dx1; - i1s_2(index + offset2 - p) = static_cast(i1) - N_GHOSTS; - dx1s_2(index + offset2 - p) = dx1; + i1s_1(index + offset1) = static_cast(i1) - N_GHOSTS; + dx1s_1(index + offset1) = dx1; + i1s_2(index + offset2) = static_cast(i1) - N_GHOSTS; + dx1s_2(index + offset2) = dx1; - i2s_1(index + offset1 - p) = static_cast(i2) - N_GHOSTS; - dx2s_1(index + offset1 - p) = dx2; - i2s_2(index + offset2 - p) = static_cast(i2) - N_GHOSTS; - dx2s_2(index + offset2 - p) = dx2; + i2s_1(index + offset1) = static_cast(i2) - N_GHOSTS; + dx2s_1(index + offset1) = dx2; + i2s_2(index + offset2) = static_cast(i2) - N_GHOSTS; + dx2s_2(index + offset2) = dx2; - i3s_1(index + offset1 - p) = static_cast(i3) - N_GHOSTS; - dx3s_1(index + offset1 - p) = dx3; - i3s_2(index + offset2 - p) = static_cast(i3) - N_GHOSTS; - dx3s_2(index + offset2 - p) = dx3; + i3s_1(index + offset1) = static_cast(i3) - N_GHOSTS; + dx3s_1(index + offset1) = dx3; + i3s_2(index + offset2) = static_cast(i3) - N_GHOSTS; + dx3s_2(index + offset2) = dx3; vec_t v_T { ZERO }, v_Cd { ZERO }; energy_dist(x_Ph, v_T, spidx1); @@ -664,30 +664,30 @@ namespace kernel { } else if constexpr (S == SimEngine::GRPIC) { metric.template transform(x_Cd, v_T, v_Cd); } - ux1s_1(index + offset1 - p) = v_Cd[0]; - ux2s_1(index + offset1 - p) = v_Cd[1]; - ux3s_1(index + offset1 - p) = v_Cd[2]; + ux1s_1(index + offset1) = v_Cd[0]; + ux2s_1(index + offset1) = v_Cd[1]; + ux3s_1(index + offset1) = v_Cd[2]; energy_dist(x_Ph, v_T, spidx2); if constexpr (S == SimEngine::SRPIC) { metric.template transform_xyz(x_Cd, v_T, v_Cd); } else if constexpr (S == SimEngine::GRPIC) { metric.template transform(x_Cd, v_T, v_Cd); } - ux1s_2(index + offset2 - p) = v_Cd[0]; - ux2s_2(index + offset2 - p) = v_Cd[1]; - ux3s_2(index + offset2 - p) = v_Cd[2]; + ux1s_2(index + offset2) = v_Cd[0]; + ux2s_2(index + offset2) = v_Cd[1]; + ux3s_2(index + offset2) = v_Cd[2]; - tags_1(index + offset1 - p) = ParticleTag::alive; - tags_2(index + offset2 - p) = ParticleTag::alive; + tags_1(index + offset1) = ParticleTag::alive; + tags_2(index + offset2) = ParticleTag::alive; if (M::CoordType == Coord::Cart) { - weights_1(index + offset1 - p) = ONE; - weights_2(index + offset2 - p) = ONE; + weights_1(index + offset1) = ONE; + weights_2(index + offset2) = ONE; } else { const auto wei = metric.sqrt_det_h( { i1_ + HALF, i2_ + HALF, i3_ + HALF }) * inv_V0; - weights_1(index + offset1 - p) = wei; - weights_2(index + offset2 - p) = wei; + weights_1(index + offset1) = wei; + weights_2(index + offset2) = wei; } } random_pool.free_state(rand_gen); From e8a753696d338d6abce960dc30170878b27a3d51 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:54:54 -0400 Subject: [PATCH 18/28] srpic_bc rm --- setups/tests/srpic_bc/pgen.hpp | 172 ---------------------------- setups/tests/srpic_bc/srpic_bc.toml | 68 ----------- 2 files changed, 240 deletions(-) delete mode 100644 setups/tests/srpic_bc/pgen.hpp delete mode 100644 setups/tests/srpic_bc/srpic_bc.toml diff --git a/setups/tests/srpic_bc/pgen.hpp b/setups/tests/srpic_bc/pgen.hpp deleted file mode 100644 index 3a1ee25f5..000000000 --- a/setups/tests/srpic_bc/pgen.hpp +++ /dev/null @@ -1,172 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "enums.h" -#include "global.h" - -#include "arch/kokkos_aliases.h" -#include "arch/traits.h" -#include "utils/numeric.h" - -#include "archetypes/energy_dist.h" -#include "archetypes/particle_injector.h" -#include "archetypes/problem_generator.h" -#include "archetypes/spatial_dist.h" -#include "framework/domain/domain.h" -#include "framework/domain/metadomain.h" - -namespace user { - using namespace ntt; - - template - struct GaussianBlob : public arch::SpatialDistribution { - GaussianBlob(const M& metric, const coord_t& c, real_t s) - : arch::SpatialDistribution { metric } - , spreadSqr { SQR(s) } { - for (auto d = 0u; d < M::Dim; ++d) { - center[d] = c[d]; - } - } - - Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - // assuming spherical - auto rSqr = SQR(x_Ph[0]) + SQR(center[0]) - - TWO * x_Ph[0] * center[0] * math::cos(x_Ph[1] - center[1]); - return std::exp(-rSqr / (2 * spreadSqr)); - } - - private: - tuple_t center; - const real_t spreadSqr; - }; - - template - struct PGen : public arch::ProblemGenerator { - - // compatibility traits for the problem generator - static constexpr auto engines = traits::compatible_with::value; - static constexpr auto metrics = - traits::compatible_with::value; - static constexpr auto dimensions = traits::compatible_with::value; - - // for easy access to variables in the child class - using arch::ProblemGenerator::D; - using arch::ProblemGenerator::C; - using arch::ProblemGenerator::params; - - const real_t r0, theta0, dr; - - inline PGen(const SimulationParams& p, const Metadomain&) - : arch::ProblemGenerator { p } - , r0 { p.template get("setup.r0") } - , theta0 { p.template get("setup.theta0") } - , dr { p.template get("setup.dr") } {} - - inline void InitPrtls(Domain& domain) { - const auto injector = arch::NonUniformInjector( - arch::ColdDist(domain.mesh.metric), - GaussianBlob(domain.mesh.metric, { r0, theta0 }, dr), - { 1, 2 }); - arch::InjectNonUniform(params, - domain, - injector, - 1.0, - true); - const auto& metric = domain.mesh.metric; - for (auto& species : domain.species) { - auto i1 = species.i1; - auto i2 = species.i2; - auto dx1 = species.dx1; - auto dx2 = species.dx2; - auto phi = species.phi; - auto pld1 = species.pld[0]; - auto pld2 = species.pld[1]; - auto pld3 = species.pld[2]; - Kokkos::parallel_for( - "SavePositions", - species.npart(), - Lambda(index_t p) { - const coord_t x_Code { - static_cast(i1(p)) + static_cast(dx1(p)), - static_cast(i2(p)) + static_cast(dx2(p)), - phi(p) - }; - coord_t x_XYZ { ZERO }; - - metric.template convert_xyz(x_Code, x_XYZ); - pld1(p) = x_XYZ[0]; - pld2(p) = x_XYZ[1]; - pld3(p) = x_XYZ[2]; - }); - } - } - - void CustomPostStep(std::size_t, long double time, Domain& domain) { - const auto& metric = domain.mesh.metric; - auto& random_pool = domain.random_pool; - const auto time_dbl = static_cast(time); - for (auto s = 0u; s < domain.species.size(); ++s) { - auto& species = domain.species[s]; - auto ux1 = species.ux1; - auto ux2 = species.ux2; - auto ux3 = species.ux3; - auto i1 = species.i1; - auto i2 = species.i2; - auto dx1 = species.dx1; - auto dx2 = species.dx2; - auto phi = species.phi; - auto pld1 = species.pld[0]; - auto pld2 = species.pld[1]; - auto pld3 = species.pld[2]; - Kokkos::parallel_for( - "SavePositions", - species.npart(), - ClassLambda(index_t p) { - const coord_t x_Code { - static_cast(i1(p)) + static_cast(dx1(p)), - static_cast(i2(p)) + static_cast(dx2(p)), - phi(p) - }; - coord_t x_XYZ { ZERO }; - - metric.template convert_xyz(x_Code, x_XYZ); - auto rand_gen = random_pool.get_state(); - if (s == 0) { - const auto dist = math::sqrt(SQR(x_XYZ[0] - pld1(p)) + - SQR(x_XYZ[1] - pld2(p)) + - SQR(x_XYZ[2] - pld3(p))); - if (time_dbl < 20.0) { - ux1(p) = 0.0; - ux2(p) = 0.0; - ux3(p) = -0.9; - } else if (time_dbl < 25.0) { - ux1(p) = 0.9; - ux2(p) = 0.0; - ux3(p) = 0.0; - } else if (time_dbl < 45.0) { - ux1(p) = 0.0; - ux2(p) = 0.0; - ux3(p) = 0.9; - } else if (time_dbl < 50.0) { - ux1(p) = 0.9; - ux2(p) = 0.0; - ux3(p) = 0.0; - } else { - ux1(p) = 5.0 * (Random(rand_gen) - HALF); - ux2(p) = 5.0 * (Random(rand_gen) - HALF); - ux3(p) = 5.0 * (Random(rand_gen) - HALF); - } - } - // add a slight spread - ux1(p) += 0.002 * (Random(rand_gen) - HALF); - ux2(p) += 0.002 * (Random(rand_gen) - HALF); - ux3(p) += 0.002 * (Random(rand_gen) - HALF); - random_pool.free_state(rand_gen); - }); - } - } - }; - -} // namespace user - -#endif \ No newline at end of file diff --git a/setups/tests/srpic_bc/srpic_bc.toml b/setups/tests/srpic_bc/srpic_bc.toml deleted file mode 100644 index 7b4aa3b84..000000000 --- a/setups/tests/srpic_bc/srpic_bc.toml +++ /dev/null @@ -1,68 +0,0 @@ -[simulation] - name = "srpic_bc" - engine = "srpic" - runtime = 100.0 - -[grid] - resolution = [512, 512] - extent = [[1.0, 30.0]] - - [grid.metric] - metric = "qspherical" - - [grid.boundaries] - fields = [["CONDUCTOR", "CONDUCTOR"]] - particles = [["REFLECT", "REFLECT"]] - -[scales] - larmor0 = 1e-4 - skindepth0 = 0.01 - -[algorithms] - current_filters = 4 - - [algorithms.timestep] - CFL = 0.5 - -[particles] - ppc0 = 64.0 - use_weights = true - sort_interval = 100 - - [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e4 - pusher = "Boris" - n_payloads = 3 - - [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e4 - pusher = "Boris" - n_payloads = 3 - -[setup] - r0 = 10.0 - theta0 = 0.3 - dr = 0.2 - -[output] - format = "hdf5" - - [output.fields] - interval_time = 1.0 - quantities = ["N_1", "N_2", "E", "B", "T00"] - - [output.particles] - enable = false - - [output.spectra] - enable = false - -[diagnostics] - interval = 5 - colored_stdout = true From 6bcb4c22d4438070b5bd221a3f3d343701218abc Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 02:29:52 -0400 Subject: [PATCH 19/28] rm const in metric tests --- src/metrics/tests/coord_trans.cpp | 4 ++-- src/metrics/tests/ks-qks.cpp | 4 ++-- src/metrics/tests/minkowski.cpp | 4 ++-- src/metrics/tests/sph-qsph.cpp | 4 ++-- src/metrics/tests/sr-cart-sph.cpp | 4 ++-- src/metrics/tests/vec_trans.cpp | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/metrics/tests/coord_trans.cpp b/src/metrics/tests/coord_trans.cpp index 027b7677a..285621e16 100644 --- a/src/metrics/tests/coord_trans.cpp +++ b/src/metrics/tests/coord_trans.cpp @@ -29,7 +29,7 @@ template Inline auto equal(const coord_t& a, const coord_t& b, const char* msg, - const real_t acc = ONE) -> bool { + real_t acc = ONE) -> bool { const auto eps = epsilon * acc; for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { @@ -189,4 +189,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/metrics/tests/ks-qks.cpp b/src/metrics/tests/ks-qks.cpp index 195465701..f7f20b94f 100644 --- a/src/metrics/tests/ks-qks.cpp +++ b/src/metrics/tests/ks-qks.cpp @@ -23,7 +23,7 @@ template Inline auto equal(const vec_t& a, const vec_t& b, const char* msg, - const real_t acc = ONE) -> bool { + real_t acc = ONE) -> bool { const auto eps = epsilon * acc; for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { @@ -182,4 +182,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/metrics/tests/minkowski.cpp b/src/metrics/tests/minkowski.cpp index 8fce450e9..1ef27b4fa 100644 --- a/src/metrics/tests/minkowski.cpp +++ b/src/metrics/tests/minkowski.cpp @@ -19,7 +19,7 @@ void errorIf(bool condition, const std::string& message) { inline static constexpr auto epsilon = std::numeric_limits::epsilon(); template -Inline auto equal(const coord_t& a, const coord_t& b, const real_t acc = ONE) +Inline auto equal(const coord_t& a, const coord_t& b, real_t acc = ONE) -> bool { for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], epsilon * acc)) { @@ -111,4 +111,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/metrics/tests/sph-qsph.cpp b/src/metrics/tests/sph-qsph.cpp index 13b1101a4..230a763e1 100644 --- a/src/metrics/tests/sph-qsph.cpp +++ b/src/metrics/tests/sph-qsph.cpp @@ -23,7 +23,7 @@ template Inline auto equal(const vec_t& a, const vec_t& b, const char* msg, - const real_t acc = ONE) -> bool { + real_t acc = ONE) -> bool { const auto eps = epsilon * acc; for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { @@ -133,4 +133,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/metrics/tests/sr-cart-sph.cpp b/src/metrics/tests/sr-cart-sph.cpp index 126dea027..fd3f7351d 100644 --- a/src/metrics/tests/sr-cart-sph.cpp +++ b/src/metrics/tests/sr-cart-sph.cpp @@ -26,7 +26,7 @@ template Inline auto equal(const coord_t& a, const coord_t& b, const char* msg, - const real_t acc = ONE) -> bool { + real_t acc = ONE) -> bool { const auto eps = epsilon * acc; for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { @@ -156,4 +156,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/metrics/tests/vec_trans.cpp b/src/metrics/tests/vec_trans.cpp index a249c1d41..fb041d9df 100644 --- a/src/metrics/tests/vec_trans.cpp +++ b/src/metrics/tests/vec_trans.cpp @@ -29,7 +29,7 @@ template Inline auto equal(const vec_t& a, const vec_t& b, const char* msg, - const real_t acc = ONE) -> bool { + real_t acc = ONE) -> bool { const auto eps = epsilon * acc; for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { From c64c5a296097be3df4645a8aa68d764c581e04da Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 02:30:43 -0400 Subject: [PATCH 20/28] rm const in kernel tests + uint->int --- src/kernels/particle_moments.hpp | 4 ++-- src/kernels/tests/ampere_mink.cpp | 5 ++--- src/kernels/tests/deposit.cpp | 10 ++++------ src/kernels/tests/faraday_mink.cpp | 5 ++--- src/kernels/tests/prtl_bc.cpp | 3 +-- 5 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/kernels/particle_moments.hpp b/src/kernels/particle_moments.hpp index 5776fad38..8b668a036 100644 --- a/src/kernels/particle_moments.hpp +++ b/src/kernels/particle_moments.hpp @@ -57,7 +57,7 @@ namespace kernel { const float charge; const bool use_weights; const M metric; - const std::size_t ni2; + const int ni2; const real_t inv_n0; const unsigned short window; @@ -111,7 +111,7 @@ namespace kernel { , charge { charge } , use_weights { use_weights } , metric { metric } - , ni2 { ni2 } + , ni2 { static_cast(ni2) } , inv_n0 { inv_n0 } , window { window } , contrib { get_contrib(mass, charge) } diff --git a/src/kernels/tests/ampere_mink.cpp b/src/kernels/tests/ampere_mink.cpp index a7e2594a0..80af88fa5 100644 --- a/src/kernels/tests/ampere_mink.cpp +++ b/src/kernels/tests/ampere_mink.cpp @@ -23,8 +23,7 @@ void errorIf(bool condition, const std::string& message) { } } -Inline auto equal(const real_t& a, const real_t& b, const char* msg, const real_t acc) - -> bool { +Inline auto equal(real_t a, real_t b, const char* msg, real_t acc) -> bool { if (not(math::abs(a - b) < acc)) { printf("%.12e != %.12e [%.12e] %s\n", a, b, math::abs(a - b), msg); return false; @@ -368,4 +367,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index 02292c643..9a8ae1cc6 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -29,10 +29,8 @@ void errorIf(bool condition, const std::string& message) { inline static constexpr auto epsilon = std::numeric_limits::epsilon(); -Inline auto equal(const real_t& a, - const real_t& b, - const char* msg = "", - const real_t acc = ONE) -> bool { +Inline auto equal(real_t a, real_t b, const char* msg = "", real_t acc = ONE) + -> bool { const auto eps = epsilon * acc; if (not cmp::AlmostEqual(a, b, eps)) { printf("%.12e != %.12e %s\n", a, b, msg); @@ -89,8 +87,8 @@ void testDeposit(const std::vector& res, const prtldx_t dxi = 0.53, dxf = 0.47; const prtldx_t dyi = 0.34, dyf = 0.52; - const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0 + (real_t)dxf; - const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0 + (real_t)dyf; + const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0 + (real_t)dxf; + const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0 + (real_t)dyf; const real_t xr = 0.5 * (xi + xf); const real_t yr = 0.5 * (yi + yf); diff --git a/src/kernels/tests/faraday_mink.cpp b/src/kernels/tests/faraday_mink.cpp index e11c8d1ff..74c2b9b1a 100644 --- a/src/kernels/tests/faraday_mink.cpp +++ b/src/kernels/tests/faraday_mink.cpp @@ -23,8 +23,7 @@ void errorIf(bool condition, const std::string& message) { } } -Inline auto equal(const real_t& a, const real_t& b, const char* msg, const real_t acc) - -> bool { +Inline auto equal(real_t a, real_t b, const char* msg, real_t acc) -> bool { if (not(math::abs(a - b) < acc)) { printf("%.12e != %.12e [%.12e] %s\n", a, b, math::abs(a - b), msg); return false; @@ -344,4 +343,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/kernels/tests/prtl_bc.cpp b/src/kernels/tests/prtl_bc.cpp index fa1103043..c8f9eae04 100644 --- a/src/kernels/tests/prtl_bc.cpp +++ b/src/kernels/tests/prtl_bc.cpp @@ -27,8 +27,7 @@ void errorIf(bool condition, const std::string& message = "") { } } -Inline auto equal(const real_t& a, const real_t& b, const std::string& msg) - -> bool { +Inline auto equal(real_t a, real_t b, const std::string& msg) -> bool { if (not(math::abs(a - b) < 1e-4)) { printf("%.12e != %.12e %s\n", a, b, msg.c_str()); return false; From 358e1eeccf5ef46f5a5b8369386dc47c5ad921eb Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 03:40:43 -0400 Subject: [PATCH 21/28] rocm tests --- src/metrics/tests/coord_trans.cpp | 2 +- src/metrics/tests/ks-qks.cpp | 52 ++++++++++++++++++++----------- src/metrics/tests/sr-cart-sph.cpp | 2 +- src/metrics/tests/vec_trans.cpp | 50 ++++++++++++++--------------- 4 files changed, 61 insertions(+), 45 deletions(-) diff --git a/src/metrics/tests/coord_trans.cpp b/src/metrics/tests/coord_trans.cpp index 285621e16..f3779a852 100644 --- a/src/metrics/tests/coord_trans.cpp +++ b/src/metrics/tests/coord_trans.cpp @@ -149,7 +149,7 @@ auto main(int argc, char* argv[]) -> int { testMetric>({ 128 }, ext1dcart); testMetric>(res2d, ext2dcart, 200); - testMetric>(res3d, ext3dcart, 200); + testMetric>(res3d, ext3dcart, 500); testMetric>(res2d, extsph, 10); testMetric>(res2d, extsph, 100, params); diff --git a/src/metrics/tests/ks-qks.cpp b/src/metrics/tests/ks-qks.cpp index f7f20b94f..bed051e16 100644 --- a/src/metrics/tests/ks-qks.cpp +++ b/src/metrics/tests/ks-qks.cpp @@ -27,7 +27,6 @@ Inline auto equal(const vec_t& a, const auto eps = epsilon * acc; for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { - printf("%d : %.12e != %.12e %s\n", d, a[d], b[d], msg); return false; } } @@ -64,13 +63,12 @@ void testMetric(const std::vector& res, npts *= res[d]; } - unsigned long all_wrongs = 0; - const auto rg = metric.rg(); - const auto a = metric.spin(); - Kokkos::parallel_reduce( + const auto rg = metric.rg(); + const auto a = metric.spin(); + Kokkos::parallel_for( "h_ij/hij", npts, - Lambda(index_t n, unsigned long& wrongs) { + Lambda(index_t n) { tuple_t idx; unravel(n, idx, res_tup); coord_t x_Code { ZERO }; @@ -138,17 +136,35 @@ void testMetric(const std::vector& res, vec_t hij_expect { h11_expect, h22_expect, h33_expect }; vec_t h_ij_expect { h_11_expect, h_22_expect, h_33_expect }; - wrongs += not equal(h_ij_predict, h_ij_expect, "h_ij", acc); - wrongs += not equal(h_13_predict, { h_13_expect }, "h_13", acc); - wrongs += not equal(hij_predict, hij_expect, "hij", acc); - wrongs += not equal(h13_predict, { h13_expect }, "h13", acc); - }, - all_wrongs); - - errorIf(all_wrongs != 0, - "wrong h_ij/hij for " + std::to_string(M::Dim) + "D " + - std::string(metric.Label) + " with " + std::to_string(all_wrongs) + - " errors"); + if (not equal(h_ij_predict, h_ij_expect, "h_ij", acc)) { + printf("h_ij: %.12e %.12e %.12e : %.12e %.12e %.12e\n", + h_ij_predict[0], + h_ij_predict[1], + h_ij_predict[2], + h_ij_expect[0], + h_ij_expect[1], + h_ij_expect[2]); + Kokkos::abort("h_ij"); + } + if (not equal(h_13_predict, { h_13_expect }, "h_13", acc)) { + printf("h_13: %.12e : %.12e\n", h_13_predict[0], h_13_expect); + Kokkos::abort("h_13"); + } + if (not equal(hij_predict, hij_expect, "hij", acc)) { + printf("hij: %.12e %.12e %.12e : %.12e %.12e %.12e\n", + hij_predict[0], + hij_predict[1], + hij_predict[2], + hij_expect[0], + hij_expect[1], + hij_expect[2]); + Kokkos::abort("hij"); + } + if (not equal(h13_predict, { h13_expect }, "h13", acc)) { + printf("h13: %.12e : %.12e\n", h13_predict[0], h13_expect); + Kokkos::abort("h13"); + } + }); } auto main(int argc, char* argv[]) -> int { @@ -168,7 +184,7 @@ auto main(int argc, char* argv[]) -> int { testMetric>( { - 64, + 32, 42 }, { { 0.8, 10.0 }, { 0.0, constant::PI } }, diff --git a/src/metrics/tests/sr-cart-sph.cpp b/src/metrics/tests/sr-cart-sph.cpp index fd3f7351d..ec2f6ddc0 100644 --- a/src/metrics/tests/sr-cart-sph.cpp +++ b/src/metrics/tests/sr-cart-sph.cpp @@ -145,7 +145,7 @@ auto main(int argc, char* argv[]) -> int { testMetric>({ 128 }, ext1dcart); testMetric>(res2d, ext2dcart, 200); - testMetric>(res3d, ext3dcart, 200); + testMetric>(res3d, ext3dcart, 500); testMetric>(res2d, extsph, 10); testMetric>(res2d, extsph, 200, params); diff --git a/src/metrics/tests/vec_trans.cpp b/src/metrics/tests/vec_trans.cpp index fb041d9df..31015115c 100644 --- a/src/metrics/tests/vec_trans.cpp +++ b/src/metrics/tests/vec_trans.cpp @@ -199,24 +199,24 @@ auto main(int argc, char* argv[]) -> int { { "h", (real_t)0.25} }; - testMetric>({ 128 }, ext1dcart); - testMetric>(res2d, ext2dcart, 200); - testMetric>(res3d, ext3dcart, 200); - testMetric>(res2d, extsph, 10); - testMetric>(res2d, extsph, 10, params); - - const auto resks = std::vector { 64, 54 }; - const auto extsgr = boundaries_t { - {0.8, 50.0}, - {0.0, constant::PI} - }; - const auto paramsks = std::map { - {"r0", -TWO}, - { "h", ZERO}, - { "a", (real_t)0.95} - }; - testMetric>(resks, extsgr, 150, paramsks); - + // testMetric>({ 128 }, ext1dcart); + // testMetric>(res2d, ext2dcart, 200); + // testMetric>(res3d, ext3dcart, 200); + // testMetric>(res2d, extsph, 10); + // testMetric>(res2d, extsph, 10, params); + + // const auto resks = std::vector { 64, 54 }; + // const auto extsgr = boundaries_t { + // {0.8, 50.0}, + // {0.0, constant::PI} + // }; + // const auto paramsks = std::map { + // {"r0", -TWO}, + // { "h", ZERO}, + // { "a", (real_t)0.95} + // }; + // testMetric>(resks, extsgr, 150, paramsks); + // const auto resqks = std::vector { 64, 42 }; const auto extqks = boundaries_t { {0.8, 10.0}, @@ -228,13 +228,13 @@ auto main(int argc, char* argv[]) -> int { { "a", (real_t)0.8} }; testMetric>(resqks, extqks, 500, paramsqks); - - const auto resks0 = std::vector { 64, 54 }; - const auto extks0 = boundaries_t { - {0.5, 20.0}, - {0.0, constant::PI} - }; - testMetric>(resks0, extks0, 10); + // + // const auto resks0 = std::vector { 64, 54 }; + // const auto extks0 = boundaries_t { + // {0.5, 20.0}, + // {0.0, constant::PI} + // }; + // testMetric>(resks0, extks0, 10); } catch (std::exception& e) { std::cerr << e.what() << std::endl; From ac5389b1158f514180f8e4fd1627d6b63e817986 Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 03:42:00 -0400 Subject: [PATCH 22/28] minor --- src/engines/srpic.hpp | 2 +- src/framework/domain/comm_mpi.hpp | 213 ------------------------------ 2 files changed, 1 insertion(+), 214 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 482de8ef1..bddc557c9 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -563,7 +563,7 @@ namespace ntt { if constexpr (M::Dim == Dim::_3D) { size[2] = domain.mesh.n_active(in::x3); } - // @TODO: this needs to be done more efficiently + // !TODO: this needs to be done more efficiently for (unsigned short i = 0; i < nfilter; ++i) { Kokkos::deep_copy(domain.fields.buff, domain.fields.cur); Kokkos::parallel_for("CurrentsFilter", diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 2854599ef..63dd8271a 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -55,9 +55,6 @@ namespace comm { if ((send_idx == idx) and (recv_idx == idx)) { // trivial copy if sending to self and receiving from self - // raise::ErrorIf((recv_idx != idx) || (send_idx != idx), - // "Cannot send to self and receive from another domain", - // HERE); if (not additive) { // simply filling the ghost cells if constexpr (D == Dim::_1D) { @@ -446,214 +443,4 @@ namespace comm { } // namespace comm -/** - * attempt to receive directly into fields - */ -// template -// auto to_tuple_impl(const Array& a, std::index_sequence) { -// return std::make_tuple(a[I]...); -// } - -// template > auto to_tuple(const std::array& a) { -// return to_tuple_impl(a, Indices {}); -// } - -// template -// inline void CommunicateField(unsigned int idx, -// ndfield_t& fld, -// unsigned int send_idx, -// unsigned int recv_idx, -// int send_rank, -// int recv_rank, -// const std::vector& send_slice, -// const std::vector& recv_slice, -// const range_tuple_t& comps, -// bool additive) { -// raise::ErrorIf(send_rank < 0 && recv_rank < 0, -// "CommunicateField called with negative ranks", -// HERE); - -// int rank; -// MPI_Comm_rank(MPI_COMM_WORLD, &rank); - -// raise::ErrorIf( -// (send_rank == rank && send_idx != idx) || -// (recv_rank == rank && recv_idx != idx), -// "Multiple-domain single-rank communication not yet implemented", -// HERE); - -// // trivial copy if sending to self and receiving from self -// if ((send_idx == idx) || (recv_idx == idx)) { -// raise::ErrorIf((recv_idx != idx) || (send_idx != idx), -// "Cannot send to self and receive from another domain", -// HERE); -// // sending/recv to/from self -// if constexpr (D == Dim::_1D) { -// Kokkos::deep_copy(Kokkos::subview(fld, recv_slice[0], comps), -// Kokkos::subview(fld, send_slice[0], comps)); -// } else if constexpr (D == Dim::_2D) { -// Kokkos::deep_copy( -// Kokkos::subview(fld, recv_slice[0], recv_slice[1], comps), -// Kokkos::subview(fld, send_slice[0], send_slice[1], comps)); -// } else if constexpr (D == Dim::_3D) { -// Kokkos::deep_copy( -// Kokkos::subview(fld, recv_slice[0], recv_slice[1], recv_slice[2], comps), -// Kokkos::subview(fld, send_slice[0], send_slice[1], send_slice[2], comps)); -// } -// return; -// } - -// std::size_t nsend { comps.second - comps.first }, -// nrecv { comps.second - comps.first }; -// // ndarray_t(D) + 1> send_fld, recv_fld; - -// for (short d { 0 }; d < (short)D; ++d) { -// if (send_rank >= 0) { -// nsend *= (send_slice[d].second - send_slice[d].first); -// } -// if (recv_rank >= 0) { -// nrecv *= (recv_slice[d].second - recv_slice[d].first); -// } -// } - -// std::vector> send_init_list; -// std::vector> recv_init_list; - -// if (send_rank >= 0) { -// for (unsigned short d = 0; d < D; ++d) { -// send_init_list.push_back({ send_slice[d].first, send_slice[d].second }); -// recv_init_list.push_back({ recv_slice[d].first, recv_slice[d].second }); -// } -// send_init_list.push_back({ comps.first, comps.second }); -// recv_init_list.push_back({ comps.first, comps.second }); -// } - -// std::array, D + 1> send_array; -// std::copy(send_init_list.begin(), send_init_list.end(), send_array.begin()); - -// std::array, D + 1> recv_array; -// std::copy(recv_init_list.begin(), recv_init_list.end(), recv_array.begin()); - -// if (send_rank >= 0 && recv_rank >= 0) { -// auto send_subview = std::apply( -// [&](auto... args) { -// return Kokkos::subview(fld, args...); -// }, -// to_tuple(send_array)); -// auto recv_subview = std::apply( -// [&](auto... args) { -// return Kokkos::subview(fld, args...); -// }, -// to_tuple(recv_array)); - -// MPI_Sendrecv(send_subview.data(), -// nsend, -// mpi::get_type(), -// send_rank, -// 0, -// recv_subview.data(), -// nrecv, -// mpi::get_type(), -// recv_rank, -// 0, -// MPI_COMM_WORLD, -// MPI_STATUS_IGNORE); -// } else if (send_rank >= 0) { -// auto send_subview = std::apply( -// [&](auto... args) { -// return Kokkos::subview(fld, args...); -// }, -// to_tuple(send_array)); - -// MPI_Send(send_subview.data(), -// nsend, -// mpi::get_type(), -// send_rank, -// 0, -// MPI_COMM_WORLD); -// } else if (recv_rank >= 0) { -// auto recv_subview = std::apply( -// [&](auto... args) { -// return Kokkos::subview(fld, args...); -// }, -// to_tuple(recv_array)); - -// MPI_Recv(recv_subview.data(), -// nrecv, -// mpi::get_type(), -// recv_rank, -// 0, -// MPI_COMM_WORLD, -// MPI_STATUS_IGNORE); -// } else { -// raise::Error("CommunicateField called with negative ranks", HERE); -// } -// if (recv_rank >= 0) { -// // // !TODO: perhaps directly recv to the fld? -// // if (not additive) { -// // if constexpr (D == Dim::_1D) { -// // Kokkos::deep_copy(Kokkos::subview(fld, recv_slice[0], comps), recv_fld); -// // } else if constexpr (D == Dim::_2D) { -// // Kokkos::deep_copy( -// // Kokkos::subview(fld, recv_slice[0], recv_slice[1], comps), -// // recv_fld); -// // } else if constexpr (D == Dim::_3D) { -// // Kokkos::deep_copy( -// // Kokkos::subview(fld, recv_slice[0], recv_slice[1], recv_slice[2], comps), -// // recv_fld); -// // } -// // } else { -// // if constexpr (D == Dim::_1D) { -// // const auto offset_x1 = recv_slice[0].first; -// // const auto offset_c = comps.first; -// // Kokkos::parallel_for( -// // "CommunicateField-extract", -// // Kokkos::MDRangePolicy, AccelExeSpace>( -// // { recv_slice[0].first, comps.first }, -// // { recv_slice[0].second, comps.second }), -// // Lambda(index_t i1, index_t ci) { -// // fld(i1, ci) += recv_fld(i1 - offset_x1, ci - offset_c); -// // }); -// // } else if constexpr (D == Dim::_2D) { -// // const auto offset_x1 = recv_slice[0].first; -// // const auto offset_x2 = recv_slice[1].first; -// // const auto offset_c = comps.first; -// // Kokkos::parallel_for( -// // "CommunicateField-extract", -// // Kokkos::MDRangePolicy, AccelExeSpace>( -// // { recv_slice[0].first, recv_slice[1].first, comps.first }, -// // { recv_slice[0].second, recv_slice[1].second, comps.second }), -// // Lambda(index_t i1, index_t i2, index_t ci) { -// // fld(i1, i2, ci) += recv_fld(i1 - offset_x1, -// // i2 - offset_x2, -// // ci - offset_c); -// // }); -// // } else if constexpr (D == Dim::_3D) { -// // const auto offset_x1 = recv_slice[0].first; -// // const auto offset_x2 = recv_slice[1].first; -// // const auto offset_x3 = recv_slice[2].first; -// // const auto offset_c = comps.first; -// // Kokkos::parallel_for( -// // "CommunicateField-extract", -// // Kokkos::MDRangePolicy, AccelExeSpace>( -// // { recv_slice[0].first, -// // recv_slice[1].first, -// // recv_slice[2].first, -// // comps.first }, -// // { recv_slice[0].second, -// // recv_slice[1].second, -// // recv_slice[2].second, -// // comps.second }), -// // Lambda(index_t i1, index_t i2, index_t i3, index_t ci) { -// // fld(i1, i2, i3, ci) += recv_fld(i1 - offset_x1, -// // i2 - offset_x2, -// // i3 - offset_x3, -// // ci - offset_c); -// // }); -// // } -// // } -// } -// } - #endif // FRAMEWORK_DOMAIN_COMM_MPI_HPP From fabeaa98e20ff281f693758670d44989cc05ca1c Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 03:43:23 -0400 Subject: [PATCH 23/28] extern + docker --- docker-compose.yml | 2 +- extern/Kokkos | 2 +- extern/adios2 | 2 +- extern/toml11 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 3de0516a3..cb0093f4b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,7 +30,7 @@ services: entity-rocm: container_name: entity_rocm devices: - - "/dev/fdk" + - "/dev/kfd" - "/dev/dri" security_opt: - seccomp:unconfined diff --git a/extern/Kokkos b/extern/Kokkos index 486cc745c..eb11070f6 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit 486cc745cb9a287f3915061455105a3ee588c616 +Subproject commit eb11070f67565b2e660659f5207f0363bdf3b882 diff --git a/extern/adios2 b/extern/adios2 index c52d755bb..b8761e2af 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit c52d755bb2ef1730a4a93b9243e2414e5a2d8456 +Subproject commit b8761e2afab2cd05b89d09b2ee4da1cd7a834225 diff --git a/extern/toml11 b/extern/toml11 index 85faca9cb..12c0f379f 160000 --- a/extern/toml11 +++ b/extern/toml11 @@ -1 +1 @@ -Subproject commit 85faca9cbe8d76324ff38c1801be44c63e12d5be +Subproject commit 12c0f379f2e865b4ce984758d5ae004f9de07d69 From 4ff47edefafe0880d693ae487480773fa29dfc2b Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 04:00:33 -0400 Subject: [PATCH 24/28] action upd --- .github/workflows/actions.yml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index c9a7a314b..c639ca298 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -11,23 +11,28 @@ jobs: strategy: fail-fast: false matrix: - gpu: ["amd", "nvidia"] - precision: ["double", "single"] - runs-on: [self-hosted, "${{ matrix.gpu }}-gpu"] + device: [amd-gpu, nvidia-gpu] + precision: [double, single] + exclude: + - device: amd-gpu + precision: double + # my AMD GPUs doesn't support fp64 atomics : ( + runs-on: [self-hosted, "${{ matrix.device }}"] steps: - name: Checkout uses: actions/checkout@v3.3.0 - name: Configure run: | - if [ "${{ matrix.gpu }}" = "nvidia" ]; then + if [ "${{ matrix.device }}" = "nvidia-gpu" ]; then FLAGS="-D Kokkos_ENABLE_CUDA=ON -D Kokkos_ARCH_AMPERE86=ON" - elif [ "${{ matrix.gpu }}" = "amd" ]; then + elif [ "${{ matrix.device }}" = "amd-gpu" ]; then FLAGS="-D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_GFX1100=ON" + export HSA_OVERRIDE_GFX_VERSION=11.0.0 HIP_VISIBLE_DEVICES=0 ROCR_VISIBLE_DEVICES=0 fi cmake -B build -D TESTS=ON -D output=ON -D precision=${{ matrix.precision }} $FLAGS - name: Compile run: | - cmake --build build -j $(exec nproc) + cmake --build build - name: Run tests run: | - ctest --test-dir build --output-on-failure --verbose \ No newline at end of file + ctest --test-dir build --output-on-failure --verbose From 81a9663b77829870bae1073fbda0ba3515a76973 Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 04:44:45 -0400 Subject: [PATCH 25/28] amd runner --- .gitignore | 1 + dev/runners/Dockerfile.runner.rocm | 85 ++++++++++++++++++++++++++++++ dev/runners/README.md | 20 +++++-- 3 files changed, 102 insertions(+), 4 deletions(-) create mode 100644 dev/runners/Dockerfile.runner.rocm diff --git a/.gitignore b/.gitignore index 76f69e364..20bfe33a3 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,4 @@ Testing/ .schema.json *_old/ +action-token diff --git a/dev/runners/Dockerfile.runner.rocm b/dev/runners/Dockerfile.runner.rocm new file mode 100644 index 000000000..97059c9b9 --- /dev/null +++ b/dev/runners/Dockerfile.runner.rocm @@ -0,0 +1,85 @@ +FROM rocm/rocm-terminal:latest + +USER root +ENV PATH=/opt/rocm/bin:$PATH + +ARG DEBIAN_FRONTEND=noninteractive +ENV DISPLAY=host.docker.internal:0.0 + +# upgrade +RUN apt-get update && apt-get upgrade -y + +# cmake & build tools +RUN apt-get remove -y --purge cmake && \ + apt-get install -y sudo wget curl build-essential && \ + wget "https://github.com/Kitware/CMake/releases/download/v3.29.6/cmake-3.29.6-linux-x86_64.tar.gz" -P /opt && \ + tar xvf /opt/cmake-3.29.6-linux-x86_64.tar.gz -C /opt && \ + rm /opt/cmake-3.29.6-linux-x86_64.tar.gz +ENV PATH=/opt/cmake-3.29.6-linux-x86_64/bin:$PATH + +# adios2 +RUN apt-get update && apt-get install -y git libhdf5-dev && \ + git clone https://github.com/ornladios/ADIOS2.git /opt/adios2-src && \ + cd /opt/adios2-src && \ + cmake -B build \ + -D CMAKE_CXX_STANDARD=17 \ + -D CMAKE_CXX_EXTENSIONS=OFF \ + -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ + -D BUILD_SHARED_LIBS=ON \ + -D ADIOS2_USE_HDF5=ON \ + -D ADIOS2_USE_Python=OFF \ + -D ADIOS2_USE_Fortran=OFF \ + -D ADIOS2_USE_ZeroMQ=OFF \ + -D BUILD_TESTING=OFF \ + -D ADIOS2_BUILD_EXAMPLES=OFF \ + -D ADIOS2_USE_MPI=OFF \ + -D ADIOS2_HAVE_HDF5_VOL=OFF \ + -D CMAKE_INSTALL_PREFIX=/opt/adios2 && \ + cmake --build build -j && \ + cmake --install build && \ + rm -rf /opt/adios2-src + +ENV HDF5_ROOT=/usr +ENV ADIOS2_DIR=/opt/adios2 +ENV PATH=/opt/adios2/bin:$PATH + +# additional ROCm packages +RUN git clone -b release/rocm-rel-6.1.1.1 https://github.com/ROCm/rocThrust.git /opt/rocthrust-src && \ + git clone -b release/rocm-rel-6.1.00.36 https://github.com/ROCm/rocPRIM.git /opt/rocprim-src && \ + cd /opt/rocthrust-src && ./install --install && \ + cd /opt/rocprim-src && ./install --install && \ + rm -rf /opt/rocthrust-src /opt/rocprim-src + +ENV CMAKE_PREFIX_PATH=/opt/rocm +ENV CC=hipcc +ENV CXX=hipcc + +# cleanup +RUN apt-get clean && \ + apt-get autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/cache/* && \ + rm -rf /var/lib/log/* && \ + rm -rf /var/lib/apt/lists/* + +ARG USER=runner +RUN useradd -ms /usr/bin/zsh $USER && \ + usermod -aG sudo $USER && \ + echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + +USER $USER +ARG HOME=/home/$USER +WORKDIR $HOME + +# gh runner +RUN mkdir actions-runner +WORKDIR $HOME/actions-runner + +RUN --mount=type=secret,id=ghtoken \ + curl -o actions-runner-linux-x64-2.317.0.tar.gz \ + -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz && \ + tar xzf ./actions-runner-linux-x64-2.317.0.tar.gz && \ + sudo ./bin/installdependencies.sh && \ + ./config.sh --url https://github.com/entity-toolkit/entity --token "$(sudo cat /run/secrets/ghtoken)" --labels amd-gpu + +ENTRYPOINT ["./run.sh"] diff --git a/dev/runners/README.md b/dev/runners/README.md index 526068bea..2b9a5b4f6 100644 --- a/dev/runners/README.md +++ b/dev/runners/README.md @@ -4,12 +4,24 @@ GitHub allows to listen to repository changes and run the so-called "actions" (e To do that, one needs to create an image with the corresponding `Dockerfile`, and then launch a Docker container which will run in the background, listening to commands and running any actions forwarded from the GitHub. +First, you will need to obtain a runner token from the Entity GitHub repo, by going to Settings -> Actions -> Runners -> New self-hosted runner. Copy the token, and save it to a file: + +```sh +echo ******TOKEN****** > action-token +``` + +Images differ slightly from the type of runner. + ### NVIDIA GPUs ```sh -# 1. Create the image -docker build -t ghrunner:nvidia -f Dockerfile.runner.nvidia . -# 2. Run a container from the image with GPU support -# ... (see wiki for instructions on NVIDIA runtime) +docker build -t ghrunner:nvidia -f Dockerfile.runner.nvidia --secret id=ghtoken,src=./action-token . docker run --runtime=nvidia --gpus=all -dt ghrunner:nvidia ``` + +### AMD GPUs + +```sh +docker build -t ghrunner:amd -f Dockerfile.runner.rocm --secret id=ghtoken,src=./action-token . +docker run --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video -dt ghrunner:amd +``` From a86b33634f7a497d651fee89bf7579d18ed1a3ac Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 05:26:59 -0400 Subject: [PATCH 26/28] actions upd --- .github/workflows/actions.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index c639ca298..253e96ed1 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -27,12 +27,11 @@ jobs: FLAGS="-D Kokkos_ENABLE_CUDA=ON -D Kokkos_ARCH_AMPERE86=ON" elif [ "${{ matrix.device }}" = "amd-gpu" ]; then FLAGS="-D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_GFX1100=ON" - export HSA_OVERRIDE_GFX_VERSION=11.0.0 HIP_VISIBLE_DEVICES=0 ROCR_VISIBLE_DEVICES=0 fi cmake -B build -D TESTS=ON -D output=ON -D precision=${{ matrix.precision }} $FLAGS - name: Compile run: | - cmake --build build + cmake --build build -j $(nproc) - name: Run tests run: | ctest --test-dir build --output-on-failure --verbose From e478ccd9e4fff69b6f7b780d69ca4b963ebe2d79 Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 16:53:58 -0400 Subject: [PATCH 27/28] actions updated --- .github/workflows/actions.yml | 9 +++++++- ...e.runner.nvidia => Dockerfile.runner.cuda} | 6 +++--- dev/runners/Dockerfile.runner.rocm | 17 ++++++++------- dev/runners/README.md | 16 +++++--------- dev/runners/start.sh | 21 +++++++++++++++++++ 5 files changed, 47 insertions(+), 22 deletions(-) rename dev/runners/{Dockerfile.runner.nvidia => Dockerfile.runner.cuda} (93%) create mode 100644 dev/runners/start.sh diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 253e96ed1..68a0a7f4a 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -24,7 +24,14 @@ jobs: - name: Configure run: | if [ "${{ matrix.device }}" = "nvidia-gpu" ]; then - FLAGS="-D Kokkos_ENABLE_CUDA=ON -D Kokkos_ARCH_AMPERE86=ON" + FLAGS="-D Kokkos_ENABLE_CUDA=ON" + if [[ ! -z $(nvidia-smi | grep "V100" ]]; then + FLAGS+=" -D Kokkos_ARCH_VOLTA70=ON" + elif [[ ! -z $(nvidia-smi | grep "A100" ]]; then + FLAGS+=" -D Kokkos_ARCH_AMPERE80=ON" + else + FLAGS+=" -D Kokkos_ARCH_AMPERE86=ON" + fi elif [ "${{ matrix.device }}" = "amd-gpu" ]; then FLAGS="-D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_GFX1100=ON" fi diff --git a/dev/runners/Dockerfile.runner.nvidia b/dev/runners/Dockerfile.runner.cuda similarity index 93% rename from dev/runners/Dockerfile.runner.nvidia rename to dev/runners/Dockerfile.runner.cuda index d7925ec6a..4ff132990 100644 --- a/dev/runners/Dockerfile.runner.nvidia +++ b/dev/runners/Dockerfile.runner.cuda @@ -59,14 +59,14 @@ ARG HOME=/home/$USER WORKDIR $HOME # gh runner -ARG TOKEN RUN mkdir actions-runner WORKDIR $HOME/actions-runner -RUN curl -o actions-runner-linux-x64-2.317.0.tar.gz \ +RUN --mount=type=secret,id=ghtoken \ + curl -o actions-runner-linux-x64-2.317.0.tar.gz \ -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz && \ tar xzf ./actions-runner-linux-x64-2.317.0.tar.gz && \ sudo ./bin/installdependencies.sh && \ - ./config.sh --url https://github.com/entity-toolkit/entity --token $TOKEN --labels nvidia-gpu + ./config.sh --url https://github.com/entity-toolkit/entity --token "$(sudo cat /run/secrets/ghtoken)" --labels nvidia-gpu ENTRYPOINT ["./run.sh"] diff --git a/dev/runners/Dockerfile.runner.rocm b/dev/runners/Dockerfile.runner.rocm index 97059c9b9..63579dcd5 100644 --- a/dev/runners/Dockerfile.runner.rocm +++ b/dev/runners/Dockerfile.runner.rocm @@ -72,14 +72,17 @@ ARG HOME=/home/$USER WORKDIR $HOME # gh runner +ARG RUNNER_VERSION=2.317.0 RUN mkdir actions-runner WORKDIR $HOME/actions-runner -RUN --mount=type=secret,id=ghtoken \ - curl -o actions-runner-linux-x64-2.317.0.tar.gz \ - -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz && \ - tar xzf ./actions-runner-linux-x64-2.317.0.tar.gz && \ - sudo ./bin/installdependencies.sh && \ - ./config.sh --url https://github.com/entity-toolkit/entity --token "$(sudo cat /run/secrets/ghtoken)" --labels amd-gpu +RUN curl -o actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz \ + -L https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz && \ + tar xzf ./actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz && \ + sudo ./bin/installdependencies.sh -ENTRYPOINT ["./run.sh"] +ADD start.sh start.sh +RUN sudo chown $USER:$USER start.sh && \ + sudo chmod +x start.sh + +ENTRYPOINT ["./start.sh"] diff --git a/dev/runners/README.md b/dev/runners/README.md index 2b9a5b4f6..08d0cd176 100644 --- a/dev/runners/README.md +++ b/dev/runners/README.md @@ -4,24 +4,18 @@ GitHub allows to listen to repository changes and run the so-called "actions" (e To do that, one needs to create an image with the corresponding `Dockerfile`, and then launch a Docker container which will run in the background, listening to commands and running any actions forwarded from the GitHub. -First, you will need to obtain a runner token from the Entity GitHub repo, by going to Settings -> Actions -> Runners -> New self-hosted runner. Copy the token, and save it to a file: - -```sh -echo ******TOKEN****** > action-token -``` - -Images differ slightly from the type of runner. +First, you will need to obtain a runner token from the Entity GitHub repo, by going to Settings -> Actions -> Runners -> New self-hosted runner. Copy the token to use it later. The images differ slightly from the type of runner. ### NVIDIA GPUs ```sh -docker build -t ghrunner:nvidia -f Dockerfile.runner.nvidia --secret id=ghtoken,src=./action-token . -docker run --runtime=nvidia --gpus=all -dt ghrunner:nvidia +docker build -t ghrunner:nvidia -f Dockerfile.runner.cuda . +docker run -e TOKEN= -e LABEL=nvidia-gpu --runtime=nvidia --gpus=all -dt ghrunner:nvidia ``` ### AMD GPUs ```sh -docker build -t ghrunner:amd -f Dockerfile.runner.rocm --secret id=ghtoken,src=./action-token . -docker run --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video -dt ghrunner:amd +docker build -t ghrunner:amd -f Dockerfile.runner.rocm . +docker run -e TOKEN= -e LABEL=amd-gpu --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video -dt ghrunner:amd ``` diff --git a/dev/runners/start.sh b/dev/runners/start.sh new file mode 100644 index 000000000..c44e26f16 --- /dev/null +++ b/dev/runners/start.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +./config.sh --unattended --url https://github.com/entity-toolkit/entity --token ${TOKEN} --labels ${LABEL} + +if [[ ${LABEL} == "amd-gpu" ]]; then + echo "AMD GPU runner detected" + export HSA_OVERRIDE_GFX_VERSION=11.0.0 HIP_VISIBLE_DEVICES=0 ROCR_VISIBLE_DEVICES=0 +else + echo "Non-AMD runner" +fi + +cleanup() { + echo "Removing runner..." + ./config.sh remove --unattended --token ${TOKEN} +} + +trap 'cleanup; exit 130' INT +trap 'cleanup; exit 143' TERM + +./run.sh & +wait $! From 1b22bf3292f7ebde54fdd74af3bfd13359d61b2e Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 16:55:10 -0400 Subject: [PATCH 28/28] actions updated --- .github/workflows/actions.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 68a0a7f4a..04bc34050 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -25,9 +25,9 @@ jobs: run: | if [ "${{ matrix.device }}" = "nvidia-gpu" ]; then FLAGS="-D Kokkos_ENABLE_CUDA=ON" - if [[ ! -z $(nvidia-smi | grep "V100" ]]; then + if [[ ! -z $(nvidia-smi | grep "V100") ]]; then FLAGS+=" -D Kokkos_ARCH_VOLTA70=ON" - elif [[ ! -z $(nvidia-smi | grep "A100" ]]; then + elif [[ ! -z $(nvidia-smi | grep "A100") ]]; then FLAGS+=" -D Kokkos_ARCH_AMPERE80=ON" else FLAGS+=" -D Kokkos_ARCH_AMPERE86=ON"